@inproceedings{doerfel2014social, address = {New York, NY, USA}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, booktitle = {Proceedings of the 23rd International World Wide Web Conference}, interhash = {9223d6d728612c8c05a80b5edceeb78b}, intrahash = {11fab5468dd4b4e3db662ea5e68df8e0}, publisher = {ACM}, series = {WWW 2014}, title = {How Social is Social Tagging?}, year = 2014 } @inproceedings{koerner2010thinking, abstract = {Recent research provides evidence for the presence of emergent semantics in collaborative tagging systems. While several methods have been proposed, little is known about the factors that influence the evolution of semantic structures in these systems. A natural hypothesis is that the quality of the emergent semantics depends on the pragmatics of tagging: Users with certain usage patterns might contribute more to the resulting semantics than others. In this work, we propose several measures which enable a pragmatic differentiation of taggers by their degree of contribution to emerging semantic structures. We distinguish between categorizers, who typically use a small set of tags as a replacement for hierarchical classification schemes, and describers, who are annotating resources with a wealth of freely associated, descriptive keywords. To study our hypothesis, we apply semantic similarity measures to 64 different partitions of a real-world and large-scale folksonomy containing different ratios of categorizers and describers. Our results not only show that ‘verbose’ taggers are most useful for the emergence of tag semantics, but also that a subset containing only 40% of the most ‘verbose’ taggers can produce results that match and even outperform the semantic precision obtained from the whole dataset. Moreover, the results suggest that there exists a causal link between the pragmatics of tagging and resulting emergent semantics. This work is relevant for designers and analysts of tagging systems interested (i) in fostering the semantic development of their platforms, (ii) in identifying users introducing “semantic noise”, and (iii) in learning ontologies.}, address = {Raleigh, NC, USA}, author = {Körner, Christian and Benz, Dominik and Strohmaier, Markus and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the 19th International World Wide Web Conference (WWW 2010)}, interhash = {5afe6e4ce8357d8ac9698060fb438468}, intrahash = {45f8d8f2a8251a5e988c596a5ebb3f2d}, month = apr, publisher = {ACM}, title = {Stop Thinking, start Tagging - Tag Semantics emerge from Collaborative Verbosity}, url = {http://www.kde.cs.uni-kassel.de/benz/papers/2010/koerner2010thinking.pdf}, year = 2010 } @inproceedings{yon2007, abstract = {The World Wide Web (WWW) is rapidly becoming important for society as a medium for sharing data, information and services, and there is a growing interest in tools for understanding collective behaviors and emerging phenomena in the WWW. In this paper we focus on the problem of searching and classifying {\em communities} in the web. Loosely speaking a community is a group of pages related to a common interest. More formally communities have been associated in the computer science literature with the existence of a locally dense sub-graph of the web-graph (where web pages are nodes and hyper-links are arcs of the web-graph). The core of our contribution is a new scalable algorithm for finding relatively dense subgraphs in massive graphs. We apply our algorithm on web-graphs built on three publicly available large crawls of the web (with raw sizes up to 120M nodes and 1G arcs). The effectiveness of our algorithm in finding dense subgraphs is demonstrated experimentally by embedding artificial communities in the web-graph and counting how many of these are blindly found. Effectiveness increases with the size and density of the communities: it is close to 100\% for communities of a thirty nodes or more (even at low density). It is still about 80\% even for communities of twenty nodes with density over $50\%$ of the arcs present. At the lower extremes the algorithm catches 35\% of dense communities made of ten nodes. We complete our Community Watch system by clustering the communities found in the web-graph into homogeneous groups by topic and labelling each group by representative keywords.}, author = {Dourisboure, Yon and Geraci, Filippo and Pellegrini, Marco}, booktitle = {Proc of the wwww}, interhash = {a07d927ef48ae1b8f3338541857c5a34}, intrahash = {480a63c3e6847dc8a9ebd3de040501db}, title = {Extraction and Classification of Dense Communities in the WebAuthors}, url = {http://www2007.org/program/paper.php?id=15}, year = 2007 } @inproceedings{65, address = {Toronto}, author = {Dean, J. and Henzinger, M.R.}, booktitle = {Proceedings of the Eighth International World Wide Web Conference WWW-1999}, interhash = {0faea7176f7344dcdb3e6906dcc58048}, intrahash = {7d3c70d55c118425216a7375f749c2f2}, isbn = {90-74821-43-X}, month = May, title = {Finding related pages in the World Wide Web}, year = 1999 } @article{Cooleyetal99, author = {Cooley, R. and Mobasher, B. and Srivastava, J.}, interhash = {68b1e11110e6498699524008fe67f8c1}, intrahash = {e515dc2a8adbc7fa84b7fe968b61391e}, journal = {Journal of Knowledge and Information Systems}, location = {Santa Barbara, CA}, number = 1, pages = {5--32}, title = {Data preparation for mining world wide web browsing patterns}, volume = 1, year = 1999 }