@inproceedings{krause2008antisocial, abstract = {The annotation of web sites in social bookmarking systems has become a popular way to manage and find information on the web. The community structure of such systems attracts spammers: recent post pages, popular pages or specific tag pages can be manipulated easily. As a result, searching or tracking recent posts does not deliver quality results annotated in the community, but rather unsolicited, often commercial, web sites. To retain the benefits of sharing one's web content, spam-fighting mechanisms that can face the flexible strategies of spammers need to be developed. A classical approach in machine learning is to determine relevant features that describe the system's users, train different classifiers with the selected features and choose the one with the most promising evaluation results. In this paper we will transfer this approach to a social bookmarking setting to identify spammers. We will present features considering the topological, semantic and profile-based information which people make public when using the system. The dataset used is a snapshot of the social bookmarking system BibSonomy and was built over the course of several months when cleaning the system from spam. Based on our features, we will learn a large set of different classification models and compare their performance. Our results represent the groundwork for a first application in BibSonomy and for the building of more elaborate spam detection mechanisms.}, acmid = {1451998}, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 4th International Workshop on Adversarial Information Retrieval on the Web}, doi = {10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {50c22098a7a85b1e43e7e4df1d8a3e7a}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, numpages = {8}, pages = {61--68}, publisher = {ACM}, series = {AIRWeb '08}, title = {The Anti-social Tagger: Detecting Spam in Social Bookmarking Systems}, url = {http://doi.acm.org/10.1145/1451983.1451998}, year = 2008 } @article{gyongyi2004combating, author = {Gyöngyi, Zoltán and Garcia molina, Hector and Pedersen, Jan}, interhash = {428bfe5366151d07a234864481364e60}, intrahash = {cb480eab1c421d316fb8cb83f9688af3}, pages = {576--587}, title = {Combating web spam with trustrank}, url = {http://citeseer.uark.edu:8080/citeseerx/viewdoc/summary?doi=10.1.1.122.8129}, year = 2004 } @inproceedings{krause2008antisocial, abstract = {The annotation of web sites in social bookmarking systemshas become a popular way to manage and find informationon the web. The community structure of such systems attractsspammers: recent post pages, popular pages or specifictag pages can be manipulated easily. As a result, searchingor tracking recent posts does not deliver quality resultsannotated in the community, but rather unsolicited, oftencommercial, web sites. To retain the benefits of sharingone’s web content, spam-fighting mechanisms that can facethe flexible strategies of spammers need to be developed.}, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th International Workshop on Adversarial Information Retrieval on the Web}, doi = {10.1145/1451983.1451998}, file = {krause2008antisocial.pdf:krause2008antisocial.pdf:PDF}, groups = {public}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {5b6b648fd25c15d594404ae26fcda6b4}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, month = apr, pages = {61--68}, publisher = {ACM}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, username = {dbenz}, year = 2008 } @inproceedings{chung2009study, abstract = {In this paper, we study the overall link-based spam structure and its evolution which would be helpful for the development of robust analysis tools and research for Web spamming as a social activity in the cyber space. First, we use strongly connected component (SCC) decomposition to separate many link farms from the largest SCC, so called the core. We show that denser link farms in the core can be extracted by node filtering and recursive application of SCC decomposition to the core. Surprisingly, we can find new large link farms during each iteration and this trend continues until at least 10 iterations. In addition, we measure the spamicity of such link farms. Next, the evolution of link farms is examined over two years. Results show that almost all large link farms do not grow anymore while some of them shrink, and many large link farms are created in one year.}, acmid = {1531917}, address = {New York, NY, USA}, author = {Chung, Young-joo and Toyoda, Masashi and Kitsuregawa, Masaru}, booktitle = {Proceedings of the 5th International Workshop on Adversarial Information Retrieval on the Web}, doi = {10.1145/1531914.1531917}, interhash = {1a0eff19d17ebb60bcbc4f7c6ccc460a}, intrahash = {d36b62eab48a830b4ac9da825a4929a5}, isbn = {978-1-60558-438-6}, location = {Madrid, Spain}, numpages = {8}, pages = {9--16}, publisher = {ACM}, title = {A study of link farm distribution and evolution using a time series of web snapshots}, url = {http://doi.acm.org/10.1145/1531914.1531917}, year = 2009 } @inproceedings{benczur2008survey, abstract = {While Web archive quality is endangered by Web spam, a side effect of the high commercial value of top-ranked search-engine results, so farWeb spam filtering technologies are rarely used byWeb archivists. In this paper we make the first attempt to disseminate existing methodology and envision a solution for Web archives to share knowledge and unite efforts in Web spam hunting. We survey the state of the art inWeb spam filtering illustrated by the recent Web spam challenge data sets and techniques and describe the filtering solution for archives envisioned in the LiWA—Living Web Archives project.}, address = {Aaarhus, Denmark}, author = {Benczúr, András A. and Siklósi, Dávid and Szabó, Jácint and Bíró, István and Fekete, Zsolt and and Miklós Kurucz and Pereszlényi, Attila and Rácz, Simon and Szabó, Adrienn}, booktitle = {Proceedings of the 8th International Web Archiving Workshop IWAW'08}, interhash = {b09d09a4d29ba2a80a5a29b9a76ed5f0}, intrahash = {911a912a75e50451923522223f7717e8}, month = sep, title = {Web Spam: a Survey with Vision for the Archivist}, url = {http://iwaw.europarchive.org/08/IWAW2008-Benczur.pdf}, year = 2008 } @inproceedings{bullock2011privacyaware, abstract = {With the increased popularity of Web 2.0 services in the last years data privacy has become a major concern for users. The more personal data users reveal, the more difficult it becomes to control its disclosure in the web. However, for Web 2.0 service providers, the data provided by users is a valuable source for offering effective, personalised data mining services. One major application is the detection of spam in social bookmarking systems: in order to prevent a decrease of content quality, providers need to distinguish spammers and exclude them from the system. They thereby experience a conflict of interests: on the one hand, they need to identify spammers based on the information they collect about users, on the other hand, they need to respect privacy concerns and process as few personal data as possible. It would therefore be of tremendous help for system developers and users to know which personal data are needed for spam detection and which can be ignored. In this paper we address these questions by presenting a data privacy aware feature engineering approach. It consists of the design of features for spam classification which are evaluated according to both, performance and privacy conditions. Experiments using data from the social bookmarking system BibSonomy show that both conditions must not exclude each other.}, acmid = {2024306}, address = {New York, NY, USA}, articleno = {15}, author = {Bullock, Beate Navarro and Lerch, Hana and Ro\ssnagel, Alexander and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 11th International Conference on Knowledge Management and Knowledge Technologies}, doi = {10.1145/2024288.2024306}, interhash = {7a2d6a35c124ea0fe31c962f8f150916}, intrahash = {00a8f31185a34957eb16d500d7d51398}, isbn = {978-1-4503-0732-1}, location = {Graz, Austria}, numpages = {8}, pages = {15:1--15:8}, publisher = {ACM}, series = {i-KNOW '11}, title = {Privacy-aware spam detection in social bookmarking systems}, url = {http://doi.acm.org/10.1145/2024288.2024306}, year = 2011 } @inproceedings{bullock2011privacyaware, abstract = {With the increased popularity of Web 2.0 services in the last years data privacy has become a major concern for users. The more personal data users reveal, the more difficult it becomes to control its disclosure in the web. However, for Web 2.0 service providers, the data provided by users is a valuable source for offering effective, personalised data mining services. One major application is the detection of spam in social bookmarking systems: in order to prevent a decrease of content quality, providers need to distinguish spammers and exclude them from the system. They thereby experience a conflict of interests: on the one hand, they need to identify spammers based on the information they collect about users, on the other hand, they need to respect privacy concerns and process as few personal data as possible. It would therefore be of tremendous help for system developers and users to know which personal data are needed for spam detection and which can be ignored. In this paper we address these questions by presenting a data privacy aware feature engineering approach. It consists of the design of features for spam classification which are evaluated according to both, performance and privacy conditions. Experiments using data from the social bookmarking system BibSonomy show that both conditions must not exclude each other.}, acmid = {2024306}, address = {New York, NY, USA}, articleno = {15}, author = {Bullock, Beate Navarro and Lerch, Hana and Ro\ssnagel, Alexander and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 11th International Conference on Knowledge Management and Knowledge Technologies}, doi = {10.1145/2024288.2024306}, interhash = {7a2d6a35c124ea0fe31c962f8f150916}, intrahash = {00a8f31185a34957eb16d500d7d51398}, isbn = {978-1-4503-0732-1}, location = {Graz, Austria}, numpages = {8}, pages = {15:1--15:8}, publisher = {ACM}, series = {i-KNOW '11}, title = {Privacy-aware spam detection in social bookmarking systems}, url = {http://doi.acm.org/10.1145/2024288.2024306}, year = 2011 } @article{lerch2010datenschutz, author = {Lerch, Hana and Krause, Beate and Hotho, Andreas and Roßnagel, Alexander and Stumme, Gerd}, interhash = {bbe328f35326b84db30c14648c176384}, intrahash = {fc44b1bdc724bbda45d08e35cba8b0ec}, journal = {MultiMedia und Recht}, pages = {454-458}, title = {Social Bookmarking-Systeme – die unerkannten Datensammler - Ungewollte personenbezogene Datenverabeitung?}, volume = 7, year = 2010 } @inproceedings{krause2008antisocialb, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proc. of the Fourth International Workshop on Adversarial Information Retrieval on the Web}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {6357f535000a383f228f1e8e56ca86ca}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @inproceedings{krause2008antisocialb, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proc. of the Fourth International Workshop on Adversarial Information Retrieval on the Web}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {6357f535000a383f228f1e8e56ca86ca}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @inproceedings{krause2008antisocial, abstract = {The annotation of web sites in social bookmarking systemshas become a popular way to manage and find informationon the web. The community structure of such systems attractsspammers: recent post pages, popular pages or specifictag pages can be manipulated easily. As a result, searchingor tracking recent posts does not deliver quality resultsannotated in the community, but rather unsolicited, oftencommercial, web sites. To retain the benefits of sharingone’s web content, spam-fighting mechanisms that can facethe flexible strategies of spammers need to be developed.}, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th International Workshop on Adversarial Information Retrieval on the Web}, doi = {10.1145/1451983.1451998}, file = {krause2008antisocial.pdf:krause2008antisocial.pdf:PDF}, groups = {public}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {5b6b648fd25c15d594404ae26fcda6b4}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, month = apr, pages = {61--68}, publisher = {ACM}, timestamp = {2010-11-10 15:35:25}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, username = {dbenz}, year = 2008 } @article{lerch2010datenschutz, author = {Lerch, Hana and Krause, Beate and Hotho, Andreas and Roßnagel, Alexander and Stumme, Gerd}, interhash = {bbe328f35326b84db30c14648c176384}, intrahash = {fc44b1bdc724bbda45d08e35cba8b0ec}, journal = {MultiMedia und Recht}, pages = {454-458}, title = {Social Bookmarking-Systeme – die unerkannten Datensammler - Ungewollte personenbezogene Datenverabeitung?}, volume = 7, year = 2010 } @article{kolari2006blog, author = {Kolari, P. and Java, A. and Finin, T. and Mayfield, J. and Joshi, A. and Martineau, J.}, interhash = {22f376a3a5e2ee890908d81f409fc08c}, intrahash = {e8d9c31822799d4d862a4bbcd885a4cf}, journal = {TREC 2006 Blog Track Notebook}, publisher = {Citeseer}, title = {{Blog track open task: Spam blog classification}}, url = {http://scholar.google.com/scholar.bib?q=info:BXvRJMPpbFUJ:scholar.google.com/&output=citation&hl=en&as_sdt=2000&as_vis=1&ct=citation&cd=10}, year = 2006 } @inproceedings{atze09, address = {Krakow, Poland}, author = {Atzmueller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {7th Conference on Computer Methods and Systems}, interhash = {c226a55c0cc2dc6f261b86c09225c260}, intrahash = {014dbd07807e05a5ea9aafb2dbead39b}, month = {November}, note = {ISBN 83-916420-5-4}, title = {Who are the Spammers? Understandable Local Patterns for Concept Description}, url = {http://www.cms.agh.edu.pl/}, year = 2009 } @inproceedings{ALKH:09, author = {Atzmueller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {Proc. LeGo-09: From Local Patterns to Global Models, Workshop at the 2009 European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, editor = {Knobbe, Johannes F\"urnkranz Arno}, interhash = {d27cd7eee4ab571ad3753a3d370141ce}, intrahash = {bb80bdcc06c8886968c453fd920dfe05}, note = {accepted}, title = {{Towards Understanding Spammers - Discovering Local Patterns for Concept Characterization and Description}}, year = 2009 } @inproceedings{krause2008anti, address = {New York, NY, USA}, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {AIRWeb '08: Proceedings of the 4th international workshop on Adversarial information retrieval on the web}, doi = {http://doi.acm.org/10.1145/1451983.1451998}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {93af1487543e0395ee8f7e0413bbcfc1}, isbn = {978-1-60558-159-0}, location = {Beijing, China}, pages = {61--68}, publisher = {ACM}, title = {The anti-social tagger: detecting spam in social bookmarking systems}, vg-wort = {29.6}, year = 2008 } @inproceedings{atze09, address = {Krakow, Poland}, author = {Atzmüller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {7th Conference on Computer Methods and Systems}, interhash = {b5c5cbe7163c7a72b6ea968e7f95eadd}, intrahash = {ddb43c532ffc1c5ed8b7e7cfb54510a3}, month = {November}, note = {ISBN 83-916420-5-4}, title = {Who are the Spammers - Understandable Local Patterns for Concept Description}, url = {http://www.cms.agh.edu.pl/}, year = 2009 } @inproceedings{krause2008antisocial, author = {Krause, Beate and Schmitz, Christoph and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proc. of the Fourth International Workshop on Adversarial Information Retrieval on the Web}, interhash = {a45d40ac7776551301ad9dde5b25357f}, intrahash = {6357f535000a383f228f1e8e56ca86ca}, title = {The Anti-Social Tagger - Detecting Spam in Social Bookmarking Systems}, url = {http://airweb.cse.lehigh.edu/2008/submissions/krause_2008_anti_social_tagger.pdf}, year = 2008 } @inproceedings{lam2004shilling, abstract = {Recommender systems have emerged in the past several years as an effective way to help people cope with the problem of information overload. One application in which they have become particularly common is in e-commerce, where recommendation of items can often help a customer find what she is interested in and, therefore can help drive sales. Unscrupulous producers in the never-ending quest for market penetration may find it profitable to shill recommender systems by lying to the systems in order to have their products recommended more often than those of their competitors. This paper explores four open questions that may affect the effectiveness of such shilling attacks: which recommender algorithm is being used, whether the application is producing recommendations or predictions, how detectable the attacks are by the operator of the system, and what the properties are of the items being attacked. The questions are explored experimentally on a large data set of movie ratings. Taken together, the results of the paper suggest that new ways must be used to evaluate and detect shilling attacks on recommender systems.}, address = {New York, NY, USA}, author = {Lam, Shyong K. and Riedl, John}, booktitle = {WWW '04: Proceedings of the 13th International Conference on World Wide Web}, doi = {10.1145/988672.988726}, interhash = {66e00212d44132e4d2ff6968a10999d4}, intrahash = {fa20593a49577529fdde250fc6d15110}, isbn = {1-58113-844-X}, location = {New York, NY, USA}, pages = {393--402}, publisher = {ACM}, title = {Shilling recommender systems for fun and profit}, url = {http://portal.acm.org/citation.cfm?id=988726&dl=GUIDE&coll=GUIDE&CFID=62005989&CFTOKEN=12250743}, year = 2004 } @inproceedings{ALKH:09, author = {Atzmueller, Martin and Lemmerich, Florian and Krause, Beate and Hotho, Andreas}, booktitle = {Proc. LeGo-09: From Local Patterns to Global Models, Workshop at the 2009 European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases}, editor = {Knobbe, Johannes F\"urnkranz Arno}, interhash = {d27cd7eee4ab571ad3753a3d370141ce}, intrahash = {bb80bdcc06c8886968c453fd920dfe05}, note = {accepted}, title = {{Towards Understanding Spammers - Discovering Local Patterns for Concept Characterization and Description}}, url = {http://www.ke.tu-darmstadt.de/events/LeGo-09/04-Atzmueller.pdf}, year = 2009 }