@inproceedings{1135858, abstract = {It has become a promising direction to measure similarity of Web search queries by mining the increasing amount of click-through data logged by Web search engines, which record the interactions between users and the search engines. Most existing approaches employ the click-through data for similarity measure of queries with little consideration of the temporal factor, while the click-through data is often dynamic and contains rich temporal information. In this paper we present a new framework of time-dependent query semantic similarity model on exploiting the temporal characteristics of historical click-through data. The intuition is that more accurate semantic similarity values between queries can be obtained by taking into account the timestamps of the log data. With a set of user-defined calendar schema and calendar patterns, our time-dependent query similarity model is constructed using the marginalized kernel technique, which can exploit both explicit similarity and implicit semantics from the click-through data effectively. Experimental results on a large set of click-through data acquired from a commercial search engine show that our time-dependent query similarity model is more accurate than the existing approaches. Moreover, we observe that our time-dependent query similarity model can, to some extent, reflect real-world semantics such as real-world events that are happening over time.}, address = {New York, NY, USA}, author = {Zhao, Qiankun and Hoi, Steven C. H. and Liu, Tie-Yan and Bhowmick, Sourav S. and Lyu, Michael R. and Ma, Wei-Ying}, booktitle = {WWW '06: Proceedings of the 15th international conference on World Wide Web}, doi = {http://doi.acm.org/10.1145/1135777.1135858}, interhash = {c765e101c37f6b530e2c1c59808048d7}, intrahash = {57cbc64550d3a1b5b8599a0783e95111}, isbn = {1-59593-323-9}, location = {Edinburgh, Scotland}, pages = {543--552}, publisher = {ACM Press}, title = {Time-dependent semantic similarity measure of queries using historical click-through data}, url = {http://portal.acm.org/citation.cfm?id=1135858}, year = 2006 } @inproceedings{1281204, address = {New York, NY, USA}, author = {Baeza-Yates, Ricardo and Tiberi, Alessandro}, booktitle = {KDD '07: Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {http://doi.acm.org/10.1145/1281192.1281204}, interhash = {26ca034be705abaf072835784f53d877}, intrahash = {6e45b65feffd1545c6dca62bf4b8f53d}, isbn = {978-1-59593-609-7}, location = {San Jose, California, USA}, pages = {76--85}, publisher = {ACM}, title = {Extracting semantic relations from query logs}, url = {http://portal.acm.org/citation.cfm?id=1281204}, year = 2007 } @inproceedings{4597173, abstract = {One challenge for relevance ranking in Web search is underspecified queries. For such queries, top-ranked documents may contain information irrelevant to the search goal of the user; some newly-created relevant documents are ranked lower due to their freshness and to the large number of existing documents that match the queries. To improve the relevance ranking for underspecified queries requires better understanding of users' search goals. By analyzing the semantic query context extracted from the query logs, we propose Q-Rank to effectively improve the ranking of search results for a given query. Experiments show that Q-Rank outperforms the current ranking system of a large-scale commercial Web search engine, improving the relevance ranking for 82% of the queries with an average increase of 8.99% in terms of discounted cumulative gains. Because Q-Rank is independent of the underlying ranking algorithm, it can be integrated with existing search engines.}, author = {Zhuang, Z. and Cucerzan, S.}, doi = {10.1109/ICSC.2008.8}, interhash = {fd70fee1920ea227a8c336fe80e2ba71}, intrahash = {8c2005e1dea667cdd23a8e5c7efe9243}, journal = {Semantic Computing, 2008 IEEE International Conference on}, month = {Aug.}, pages = {50-57}, title = {Exploiting Semantic Query Context to Improve Search Ranking}, year = 2008 } @inproceedings{krause2008comparison, abstract = {Social bookmarking systems allow users to store links to internet resources on a web page. As social bookmarking systems are growing in popularity, search algorithms have been developed that transfer the idea of link-based rankings in the Web to a social bookmarking system’s data structure. These rankings differ from traditional search engine rankings in that they incorporate the rating of users. In this study, we compare search in social bookmarking systems with traditionalWeb search. In the first part, we compare the user activity and behaviour in both kinds of systems, as well as the overlap of the underlying sets of URLs. In the second part,we compare graph-based and vector space rankings for social bookmarking systems with commercial search engine rankings. Our experiments are performed on data of the social bookmarking system Del.icio.us and on rankings and log data from Google, MSN, and AOL. We will show that part of the difference between the systems is due to different behaviour (e. g., the concatenation of multi-word lexems to single terms in Del.icio.us), and that real-world events may trigger similar behaviour in both kinds of systems. We will also show that a graph-based ranking approach on folksonomies yields results that are closer to the rankings of the commercial search engines than vector space retrieval, and that the correlation is high in particular for the domains that are well covered by the social bookmarking system.}, author = {Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Advances in Information Retrieval, 30th European Conference on IR Research, ECIR 2008}, interhash = {37598733b747093d97a0840a11beebf5}, intrahash = {613f5c41ff759fc548c9085102d1c933}, pages = {101-113}, publisher = {Springer}, title = {A Comparison of Social Bookmarking with Traditional Search}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/ecir2008krause.pdf}, volume = 4956, year = 2008 } @inproceedings{krause2008logsonomy, abstract = {Social bookmarking systems constitute an established part of the Web 2.0. In such systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Today’s search engines represent the gateway to retrieve information from the World Wide Web. Short queries typically consisting of two to three words describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. This clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. The resulting network structure, which we will term logsonomy is very similar to the one of folksonomies. In order to find out about its properties, we analyze the topological characteristics of the tripartite hypergraph of queries, users and bookmarks on a large snapshot of del.icio.us and on query logs of two large search engines. All of the three datasets show small world properties. The tagging behavior of users, which is explained by preferential attachment of the tags in social bookmark systems, is reflected in the distribution of single query words in search engines. We can conclude that the clicking behaviour of search engine users based on the displayed search results and the tagging behaviour of social bookmarking users is driven by similar dynamics.}, address = {New York, NY, USA}, author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd}, booktitle = {HT '08: Proceedings of the Nineteenth ACM Conference on Hypertext and Hypermedia}, doi = {http://doi.acm.org/10.1145/1379092.1379123}, interhash = {6d34ea1823d95b9dbf37d4db4d125d2a}, intrahash = {e64d14f3207766f4afc65983fa759ffe}, isbn = {978-1-59593-985-2}, location = {Pittsburgh, PA, USA}, pages = {157--166}, publisher = {ACM}, title = {Logsonomy - Social Information Retrieval with Logdata}, url = {http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia}, vgwort = {17}, year = 2008 } @article{benz2010query, abstract = {Query logs provide a valuable resource for preference information in search. A user clicking on a specific resource after submitting a query indicates that the resource has some relevance with respect to the query. To leverage the information ofquery logs, one can relate submitted queries from specific users to their clicked resources and build a tripartite graph ofusers, resources and queries. This graph resembles the folksonomy structure of social bookmarking systems, where users addtags to resources. In this article, we summarize our work on building folksonomies from query log files. The focus is on threecomparative studies of the system’s content, structure and semantics. Our results show that query logs incorporate typicalfolksonomy properties and that approaches to leverage the inherent semantics of folksonomies can be applied to query logsas well. }, author = {Benz, Dominik and Hotho, Andreas and Jäschke, Robert and Krause, Beate and Stumme, Gerd}, interhash = {dae3931a5f445dc67bf111b26f753c36}, intrahash = {bf96c01262d15fb6eaaf558ecb9a9e69}, journal = {Datenbank-Spektrum}, month = jun, number = 1, pages = {15--24}, title = {Query Logs as Folksonomies}, url = {http://dx.doi.org/10.1007/s13222-010-0004-8}, volume = 10, year = 2010 } @article{benz2010query, abstract = {Query logs provide a valuable resource for preference information in search. A user clicking on a specific resource after submitting a query indicates that the resource has some relevance with respect to the query. To leverage the information ofquery logs, one can relate submitted queries from specific users to their clicked resources and build a tripartite graph ofusers, resources and queries. This graph resembles the folksonomy structure of social bookmarking systems, where users addtags to resources. In this article, we summarize our work on building folksonomies from query log files. The focus is on threecomparative studies of the system’s content, structure and semantics. Our results show that query logs incorporate typicalfolksonomy properties and that approaches to leverage the inherent semantics of folksonomies can be applied to query logsas well. }, author = {Benz, Dominik and Hotho, Andreas and Jäschke, Robert and Krause, Beate and Stumme, Gerd}, interhash = {dae3931a5f445dc67bf111b26f753c36}, intrahash = {bf96c01262d15fb6eaaf558ecb9a9e69}, journal = {Datenbank-Spektrum}, month = jun, number = 1, pages = {15--24}, title = {Query Logs as Folksonomies}, url = {http://dx.doi.org/10.1007/s13222-010-0004-8}, volume = 10, year = 2010 } @article{strohmaier2008acquiring, address = {Los Alamitos, CA, USA}, author = {Strohmaier, Markus and Prettenhofer, Peter and Kröll, Mark}, doi = {http://doi.ieeecomputersociety.org/10.1109/WIIAT.2008.364}, interhash = {c3708f588c62e94de5c056e708a24699}, intrahash = {120d307a0a7391dc6a273a6e28743360}, isbn = {978-0-7695-3496-1}, journal = {Web Intelligence and Intelligent Agent Technology, IEEE/WIC/ACM International Conference on}, pages = {602-605}, publisher = {IEEE Computer Society}, title = {Acquiring Explicit User Goals from Search Query Logs}, url = {http://doi.ieeecomputersociety.org/10.1109/WIIAT.2008.364}, volume = 3, year = 2008 } @inproceedings{francisco2008clique, author = {Francisco, Alexandre P. and Baeza-Yates, Ricardo A. and Oliveira, Arlindo L.}, booktitle = {SPIRE}, crossref = {conf/spire/2008}, date = {2008-11-24}, editor = {Amir, Amihood and Turpin, Andrew and Moffat, Alistair}, ee = {http://dx.doi.org/10.1007/978-3-540-89097-3_19}, interhash = {3df2855b8d7c408298efcda601eac68c}, intrahash = {42101c15dfe2345542627b51e623b165}, isbn = {978-3-540-89096-6}, pages = {188-199}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Clique Analysis of Query Log Graphs.}, url = {http://dblp.uni-trier.de/db/conf/spire/spire2008.html#FranciscoBO08}, volume = 5280, year = 2008 } @article{nicholas2005scholarly, address = {Bingley}, author = {Nicholas, David and Huntington, Paul and Watkinson, Anthony}, interhash = {10580bc4cff2d14ca913b1682e728c9a}, intrahash = {8e733e3b55a1a648c6e5070d347c43dc}, journal = {Journal of Documentation}, number = 2, pages = {248--280}, publisher = {Emerald Group Publishing Limited}, title = {Scholarly journal usage: the results of deep log analysis}, url = {http://www.emeraldinsight.com/journals.htm?articleid=1465024&show=abstract}, volume = 61, year = 2005 } @article{silverstein1999analysis, abstract = {In this paper we present an analysis of an AltaVista Search Engine query log consisting of approximately 1 billion entries for search requests over a period of six weeks. This represents almost 285 million user sessions, each an attempt to fill a single information need. We present an analysis of individual queries, query duplication, and query sessions. We also present results of a correlation analysis of the log entries, studying the interaction of terms within queries. Our data supports the conjecture that web users differ significantly from the user assumed in the standard information retrieval literature. Specifically, we show that web users type in short queries, mostly look at the first 10 results only, and seldom modify the query. This suggests that traditional information retrieval techniques may not work well for answering web search requests. The correlation analysis showed that the most highly correlated items are constituents of phrases. This result indicates it may be useful for search engines to consider search terms as parts of phrases even if the user did not explicitly specify them as such.}, acmid = {331405}, address = {New York, NY, USA}, author = {Silverstein, Craig and Marais, Hannes and Henzinger, Monika and Moricz, Michael}, doi = {10.1145/331403.331405}, interhash = {5e26846be504d4fc6b6a7b236c1c023a}, intrahash = {4ac734beeccbcb3a05786e8ca57f5629}, issn = {0163-5840}, issue_date = {Fall 1999}, journal = {SIGIR Forum}, month = sep, number = 1, numpages = {7}, pages = {6--12}, publisher = {ACM}, title = {Analysis of a very large web search engine query log}, url = {http://doi.acm.org/10.1145/331403.331405}, volume = 33, year = 1999 } @article{jansen2006search, abstract = {The use of data stored in transaction logs of Web search engines, Intranets, and Web sites can provide valuable insight into understanding the information-searching process of online searchers. This understanding can enlighten information system design, interface development, and devising the information architecture for content collections. This article presents a review and foundation for conducting Web search transaction log analysis. A methodology is outlined consisting of three stages, which are collection, preparation, and analysis. The three stages of the methodology are presented in detail with discussions of goals, metrics, and processes at each stage. Critical terms in transaction log analysis for Web searching are defined. The strengths and limitations of transaction log analysis as a research method are presented. An application to log client-side interactions that supplements transaction logs is reported on, and the application is made available for use by the research community. Suggestions are provided on ways to leverage the strengths of, while addressing the limitations of, transaction log analysis for Web-searching research. Finally, a complete flat text transaction log from a commercial search engine is available as supplementary material with this manuscript.}, author = {Jansen, Bernard J.}, doi = {10.1016/j.lisr.2006.06.005}, interhash = {0488e60c424ea821ee7b3e3760ffd115}, intrahash = {e147f866b624d461c77a24b79b2d9aff}, issn = {0740-8188}, journal = {Library & Information Science Research}, number = 3, pages = {407 - 432}, title = {Search log analysis: What it is, what's been done, how to do it}, url = {http://www.sciencedirect.com/science/article/pii/S0740818806000673}, volume = 28, year = 2006 } @inproceedings{abrams1998information, acmid = {274651}, address = {New York, NY, USA}, author = {Abrams, David and Baecker, Ron and Chignell, Mark}, booktitle = {Proceedings of the SIGCHI Conference on Human Factors in Computing Systems}, doi = {10.1145/274644.274651}, interhash = {fbb2704604de0954b432c8615a0abf5b}, intrahash = {a9a25a144cec844bcd7daeace4a548aa}, isbn = {0-201-30987-4}, location = {Los Angeles, California, USA}, numpages = {8}, pages = {41--48}, publisher = {ACM Press/Addison-Wesley Publishing Co.}, series = {CHI '98}, title = {Information archiving with bookmarks: personal Web space construction and organization}, url = {http://dx.doi.org/10.1145/274644.274651}, year = 1998 } @techreport{doerfel2014course, abstract = {Social tagging systems have established themselves as an important part in today's web and have attracted the interest from our research community in a variety of investigations. The overall vision of our community is that simply through interactions with the system, i.e., through tagging and sharing of resources, users would contribute to building useful semantic structures as well as resource indexes using uncontrolled vocabulary not only due to the easy-to-use mechanics. Henceforth, a variety of assumptions about social tagging systems have emerged, yet testing them has been difficult due to the absence of suitable data. In this work we thoroughly investigate three available assumptions - e.g., is a tagging system really social? - by examining live log data gathered from the real-world public social tagging system BibSonomy. Our empirical results indicate that while some of these assumptions hold to a certain extent, other assumptions need to be reflected and viewed in a very critical light. Our observations have implications for the design of future search and other algorithms to better reflect the actual user behavior.}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, interhash = {65f287480af20fc407f7d26677f17b72}, intrahash = {e360f0bd207806e72305efe16491ebe3}, note = {cite arxiv:1401.0629}, title = {Of course we share! Testing Assumptions about Social Tagging Systems}, url = {http://arxiv.org/abs/1401.0629}, year = 2014 } @incollection{millen2007social, abstract = {In this paper, we explore various search tasks that are supported by a social bookmarking service. These bookmarking services hold great potential to powerfully combine personal tagging of information sources with interactive browsing, resulting in better social navigation. While there has been considerable interest in social tagging systems in recent years, little is known about their actual usage. In this paper, we present the results of a field study of a social bookmarking service that has been deployed in a large enterprise. We present new qualitative and quantitative data on how a corporate social tagging system was used, through both event logs (click level analysis) and interviews. We observed three types of search activities: community browsing, personal search, and explicit search. Community browsing was the most frequently used, and confirms the value of the social aspects of the system. We conclude that social bookmarking services support various kinds of exploratory search, and provide better personal bookmark management and enhance social navigation.}, author = {Millen, David R. and Yang, Meng and Whittaker, Steven and Feinberg, Jonathan}, booktitle = {ECSCW 2007}, doi = {10.1007/978-1-84800-031-5_2}, editor = {Bannon, Liam J. and Wagner, Ina and Gutwin, Carl and Harper, Richard H.R. and Schmidt, Kjeld}, interhash = {8f316838202ce31c603f8576a56532ff}, intrahash = {08aa0611b1f4e01f2dfd760dc5969b82}, isbn = {978-1-84800-030-8}, pages = {21-40}, publisher = {Springer London}, title = {Social bookmarking and exploratory search}, url = {http://dx.doi.org/10.1007/978-1-84800-031-5_2}, year = 2007 } @article{jiang2013understanding, abstract = {Popular online social networks (OSNs) like Facebook and Twitter are changing the way users communicate and interact with the Internet. A deep understanding of user interactions in OSNs can provide important insights into questions of human social behavior and into the design of social platforms and applications. However, recent studies have shown that a majority of user interactions on OSNs are latent interactions, that is, passive actions, such as profile browsing, that cannot be observed by traditional measurement techniques. In this article, we seek a deeper understanding of both active and latent user interactions in OSNs. For quantifiable data on latent user interactions, we perform a detailed measurement study on Renren, the largest OSN in China with more than 220 million users to date. All friendship links in Renren are public, allowing us to exhaustively crawl a connected graph component of 42 million users and 1.66 billion social links in 2009. Renren also keeps detailed, publicly viewable visitor logs for each user profile. We capture detailed histories of profile visits over a period of 90 days for users in the Peking University Renren network and use statistics of profile visits to study issues of user profile popularity, reciprocity of profile visits, and the impact of content updates on user popularity. We find that latent interactions are much more prevalent and frequent than active events, are nonreciprocal in nature, and that profile popularity is correlated with page views of content rather than with quantity of content updates. Finally, we construct latent interaction graphs as models of user browsing behavior and compare their structural properties, evolution, community structure, and mixing times against those of both active interaction graphs and social graphs.}, acmid = {2517040}, address = {New York, NY, USA}, articleno = {18}, author = {Jiang, Jing and Wilson, Christo and Wang, Xiao and Sha, Wenpeng and Huang, Peng and Dai, Yafei and Zhao, Ben Y.}, doi = {10.1145/2517040}, interhash = {af18171c38a0b07fce62fb3fac5c6322}, intrahash = {aa9695f56135fd58de32b5b4a4c73698}, issn = {1559-1131}, issue_date = {October 2013}, journal = {ACM Trans. Web}, month = nov, number = 4, numpages = {39}, pages = {18:1--18:39}, publisher = {ACM}, title = {Understanding Latent Interactions in Online Social Networks}, url = {http://doi.acm.org/10.1145/2517040}, volume = 7, year = 2013 } @inproceedings{benevenuto2009characterizing, abstract = {Understanding how users behave when they connect to social networking sites creates opportunities for better interface design, richer studies of social interactions, and improved design of content distribution systems. In this paper, we present a first of a kind analysis of user workloads in online social networks. Our study is based on detailed clickstream data, collected over a 12-day period, summarizing HTTP sessions of 37,024 users who accessed four popular social networks: Orkut, MySpace, Hi5, and LinkedIn. The data were collected from a social network aggregator website in Brazil, which enables users to connect to multiple social networks with a single authentication. Our analysis of the clickstream data reveals key features of the social network workloads, such as how frequently people connect to social networks and for how long, as well as the types and sequences of activities that users conduct on these sites. Additionally, we crawled the social network topology of Orkut, so that we could analyze user interaction data in light of the social graph. Our data analysis suggests insights into how users interact with friends in Orkut, such as how frequently users visit their friends' or non-immediate friends' pages. In summary, our analysis demonstrates the power of using clickstream data in identifying patterns in social network workloads and social interactions. Our analysis shows that browsing, which cannot be inferred from crawling publicly available data, accounts for 92% of all user activities. Consequently, compared to using only crawled data, considering silent interactions like browsing friends' pages increases the measured level of interaction among users.}, acmid = {1644900}, address = {New York, NY, USA}, author = {Benevenuto, Fabr\'{\i}cio and Rodrigues, Tiago and Cha, Meeyoung and Almeida, Virg\'{\i}lio}, booktitle = {Proceedings of the 9th ACM SIGCOMM Conference on Internet Measurement Conference}, doi = {10.1145/1644893.1644900}, interhash = {ed9b10d4f36f90ddde9b95ce45b0b0be}, intrahash = {e5e25244e1ca2316a7871727e2df2bb9}, isbn = {978-1-60558-771-4}, location = {Chicago, Illinois, USA}, numpages = {14}, pages = {49--62}, publisher = {ACM}, series = {IMC '09}, title = {Characterizing User Behavior in Online Social Networks}, url = {http://doi.acm.org/10.1145/1644893.1644900}, year = 2009 } @inproceedings{doerfel2014social, address = {New York, NY, USA}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, booktitle = {Proceedings of the 23rd International World Wide Web Conference}, interhash = {9223d6d728612c8c05a80b5edceeb78b}, intrahash = {11fab5468dd4b4e3db662ea5e68df8e0}, publisher = {ACM}, series = {WWW 2014}, title = {How Social is Social Tagging?}, year = 2014 } @inproceedings{doerfel2014social, address = {New York, NY, USA}, author = {Doerfel, Stephan and Zoller, Daniel and Singer, Philipp and Niebler, Thomas and Hotho, Andreas and Strohmaier, Markus}, booktitle = {Proceedings of the 23rd International World Wide Web Conference}, interhash = {9223d6d728612c8c05a80b5edceeb78b}, intrahash = {11fab5468dd4b4e3db662ea5e68df8e0}, publisher = {ACM}, series = {WWW 2014}, title = {How Social is Social Tagging?}, year = 2014 } @article{thomas2014using, abstract = {A user's behaviour when browsing a Web site contains clues to that user's experience. It is possible to record some of these behaviours automatically, and extract signals that indicate a user is having trouble finding information. This allows for Web site analytics based on user experiences, not just page impressions. A series of experiments identified user browsing behaviours—such as time taken and amount of scrolling up a page—which predict navigation difficulty and which can be recorded with minimal or no changes to existing sites or browsers. In turn, patterns of page views correlate with these signals and these patterns can help Web authors understand where and why their sites are hard to navigate. A new software tool, “LATTE,” automates this analysis and makes it available to Web authors in the context of the site itself.}, acmid = {2656343}, address = {New York, NY, USA}, articleno = {24}, author = {Thomas, Paul}, doi = {10.1145/2656343}, interhash = {b570b16074de8ee1b2db9fcf1061d16b}, intrahash = {06035bc4bd2c62d7dba957ad4410d7b3}, issn = {1559-1131}, issue_date = {October 2014}, journal = {ACM Transactions on the Web}, month = nov, number = 4, numpages = {41}, pages = {24:1--24:41}, publisher = {ACM}, title = {Using Interaction Data to Explain Difficulty Navigating Online}, url = {http://doi.acm.org/10.1145/2656343}, volume = 8, year = 2014 }