@incollection{pieper2009wissenschaftliche, abstract = {Dieser Beitrag untersucht, in welchem Umfang Dokumente von Dokumentenservern wissenschaftlicher Institutionen in den allgemeinen Suchmaschinen Google und Yahoo nachgewiesen sind und inwieweit wissenschaftliche Suchmaschinen für die Suche nach solchen Dokumenten besser geeignet sind. Dazu werden die fünf Suchmaschinen BASE, Google Scholar, OAIster, Scientific Commons und Scirus überblickartig beschrieben und miteinander verglichen. Hauptaugenmerk wird dabei auf die unterschiedlichen Inhalte, Suchfunktionen und Ausgabemöglichkeiten gelegt, mit Hilfe eines Retrievaltests wird speziell die Leistungsfähigkeit der Suchmaschinen beim Auffinden von Dokumenten, deren Volltexte im Sinne des Open Access direkt und ohne Beschränkungen aufrufbar sind, untersucht.}, author = {Pieper, Dirk and Wolf, Sebastian}, booktitle = {Handbuch Internet-Suchmaschinen: Nutzerorientierung in Wissenschaft und Praxis}, editor = {Dirk, Lewandowski}, interhash = {b915fb45a9a6dc3499247e76992c7897}, intrahash = {1f997db426731303690c9bb962f1c158}, pages = {356--374}, publisher = {Akademische Verlagsgesellschaft AKA}, title = {Wissenschaftliche Dokumente in Suchmaschinen}, url = {http://eprints.rclis.org/12746/}, year = 2009 } @article{silverstein1999analysis, abstract = {In this paper we present an analysis of an AltaVista Search Engine query log consisting of approximately 1 billion entries for search requests over a period of six weeks. This represents almost 285 million user sessions, each an attempt to fill a single information need. We present an analysis of individual queries, query duplication, and query sessions. We also present results of a correlation analysis of the log entries, studying the interaction of terms within queries. Our data supports the conjecture that web users differ significantly from the user assumed in the standard information retrieval literature. Specifically, we show that web users type in short queries, mostly look at the first 10 results only, and seldom modify the query. This suggests that traditional information retrieval techniques may not work well for answering web search requests. The correlation analysis showed that the most highly correlated items are constituents of phrases. This result indicates it may be useful for search engines to consider search terms as parts of phrases even if the user did not explicitly specify them as such.}, acmid = {331405}, address = {New York, NY, USA}, author = {Silverstein, Craig and Marais, Hannes and Henzinger, Monika and Moricz, Michael}, doi = {10.1145/331403.331405}, interhash = {5e26846be504d4fc6b6a7b236c1c023a}, intrahash = {4ac734beeccbcb3a05786e8ca57f5629}, issn = {0163-5840}, issue_date = {Fall 1999}, journal = {SIGIR Forum}, month = sep, number = 1, numpages = {7}, pages = {6--12}, publisher = {ACM}, title = {Analysis of a very large web search engine query log}, url = {http://doi.acm.org/10.1145/331403.331405}, volume = 33, year = 1999 } @inproceedings{mitzlaff2012namelings, author = {Mitzlaff, Folke and Stumme, Gerd}, booktitle = {SocInfo}, editor = {Aberer, Karl and Flache, Andreas and Jager, Wander and Liu, Ling and Tang, Jie and Guéret, Christophe}, ee = {http://dx.doi.org/10.1007/978-3-642-35386-4_39}, interhash = {2f803cd9938df1f11229f9180577a341}, intrahash = {e2770a8535fca7cce582148703d8980a}, isbn = {978-3-642-35385-7}, pages = {531-534}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Namelings - Discover Given Name Relatedness Based on Data from the Social Web.}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/mitzlaff2012namelings.pdf}, volume = 7710, year = 2012 } @inproceedings{mitzlaff2012ranking, author = {Mitzlaff, Folke and Stumme, Gerd}, booktitle = {Proceedings of the 1st ASE International Conference on Social Informatics}, editor = {Marathe, Madhav and Contractor, Noshir}, interhash = {339a7285bfb35e6f3eb1f22f98e818a3}, intrahash = {c2f599000eaa568ed4d1b0b9d3f6fadd}, pages = {185-191}, publisher = {IEEE computer society}, title = {Ranking Given Names}, year = 2012 } @book{croft2010search, address = {Boston}, author = {Croft, W. Bruce and Metzler, Donald and Strohman, Trevor}, edition = {1st}, interhash = {bc2cb2c872ddae363967b53064670cd8}, intrahash = {6dbe8ff9de4f8b16c442247baf8abe73}, isbn = {9780136072249 0136072240}, month = feb, publisher = {Addison-Wesley}, refid = {268788295}, title = {Search engines: information retrieval in practice}, url = {http://www.amazon.com/Search-Engines-Information-Retrieval-Practice/dp/0136072240}, year = 2010 } @inproceedings{liu2011browsing, abstract = {To optimize the performance of web crawlers, various page importance measures have been studied to select and order URLs in crawling. Most sophisticated measures (e.g. breadth-first and PageRank) are based on link structure. In this paper, we treat the problem from another perspective and propose to measure page importance through mining user interest and behaviors from web browse logs. Unlike most existing approaches which work on single URL, in this paper, both the log mining and the crawl ordering are performed at the granularity of URL pattern. The proposed URL pattern-based crawl orderings are capable to properly predict the importance of newly created (unseen) URLs. Promising experimental results proved the feasibility of our approach.}, acmid = {2063593}, address = {New York, NY, USA}, author = {Liu, Minghai and Cai, Rui and Zhang, Ming and Zhang, Lei}, booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management}, doi = {10.1145/2063576.2063593}, interhash = {7b45567cb6a492d8354dc32401549291}, intrahash = {3ce89bd8a3d3eb6306b739fe1f4088df}, isbn = {978-1-4503-0717-8}, location = {Glasgow, Scotland, UK}, numpages = {6}, pages = {87--92}, publisher = {ACM}, title = {User browsing behavior-driven web crawling}, url = {http://doi.acm.org/10.1145/2063576.2063593}, year = 2011 } @inproceedings{bahmani2012pagerank, abstract = {One of the most important features of the Web graph and social networks is that they are constantly evolving. The classical computational paradigm, which assumes a fixed data set as an input to an algorithm that terminates, is inadequate for such settings. In this paper we study the problem of computing PageRank on an evolving graph. We propose an algorithm that, at any moment in the time and by crawling a small portion of the graph, provides an estimate of the PageRank that is close to the true PageRank of the graph at that moment. We will also evaluate our algorithm experimentally on real data sets and on randomly generated inputs. Under a stylized model of graph evolution, we show that our algorithm achieves a provable performance guarantee that is significantly better than the naive algorithm that crawls the nodes in a round-robin fashion.}, acmid = {2339539}, address = {New York, NY, USA}, author = {Bahmani, Bahman and Kumar, Ravi and Mahdian, Mohammad and Upfal, Eli}, booktitle = {Proceedings of the 18th ACM SIGKDD international conference on Knowledge discovery and data mining}, doi = {10.1145/2339530.2339539}, interhash = {4572c8d52b91bf4487183b6c3b900827}, intrahash = {6058356e9c5a62b3993686ff5eac9529}, isbn = {978-1-4503-1462-6}, location = {Beijing, China}, numpages = {9}, pages = {24--32}, publisher = {ACM}, title = {PageRank on an evolving graph}, url = {http://doi.acm.org/10.1145/2339530.2339539}, year = 2012 } @inproceedings{cho2007rankmass, abstract = {Crawling algorithms have been the subject of extensive research and optimizations, but some important questions remain open. In particular, given the unbounded number of pages available on the Web, search-engine operators constantly struggle with the following vexing questions: When can I stop downloading the Web? How many pages should I download to cover "most" of the Web? How can I know I am not missing an important part when I stop? In this paper we provide an answer to these questions by developing, in the context of a system that is given a set of trusted pages, a family of crawling algorithms that (1) provide a theoretical guarantee on how much of the "important" part of the Web it will download after crawling a certain number of pages and (2) give a high priority to important pages during a crawl, so that the search engine can index the most important part of the Web first. We prove the correctness of our algorithms by theoretical analysis and evaluate their performance experimentally based on 141 million URLs obtained from the Web. Our experiments demonstrate that even our simple algorithm is effective in downloading important pages early on and provides high "coverage" of the Web with a relatively small number of pages.}, acmid = {1325897}, author = {Cho, Junghoo and Schonfeld, Uri}, booktitle = {Proceedings of the 33rd international conference on Very large data bases}, interhash = {c5573f70e067624e3a559996172a45ef}, intrahash = {3227ef077a463fbaa6ba1ac7aac82d06}, isbn = {978-1-59593-649-3}, location = {Vienna, Austria}, numpages = {12}, pages = {375--386}, publisher = {VLDB Endowment}, title = {RankMass crawler: a crawler with high personalized pagerank coverage guarantee}, url = {http://dl.acm.org/citation.cfm?id=1325851.1325897}, year = 2007 } @inproceedings{olston2008recrawl, abstract = {It is crucial for a web crawler to distinguish between ephemeral and persistent content. Ephemeral content (e.g., quote of the day) is usually not worth crawling, because by the time it reaches the index it is no longer representative of the web page from which it was acquired. On the other hand, content that persists across multiple page updates (e.g., recent blog postings) may be worth acquiring, because it matches the page's true content for a sustained period of time.

In this paper we characterize the longevity of information found on the web, via both empirical measurements and a generative model that coincides with these measurements. We then develop new recrawl scheduling policies that take longevity into account. As we show via experiments over real web data, our policies obtain better freshness at lower cost, compared with previous approaches.}, acmid = {1367557}, address = {New York, NY, USA}, author = {Olston, Christopher and Pandey, Sandeep}, booktitle = {Proceedings of the 17th international conference on World Wide Web}, doi = {10.1145/1367497.1367557}, interhash = {62dabc7c7aa03203804fde1b32b5fbe0}, intrahash = {68ecda3b2d943f8625add57a3a2f3a7c}, isbn = {978-1-60558-085-2}, location = {Beijing, China}, numpages = {10}, pages = {437--446}, publisher = {ACM}, title = {Recrawl scheduling based on information longevity}, url = {http://doi.acm.org/10.1145/1367497.1367557}, year = 2008 } @inproceedings{pandey2005usercentric, abstract = {Search engines are the primary gateways of information access on the Web today. Behind the scenes, search engines crawl the Web to populate a local indexed repository of Web pages, used to answer user search queries. In an aggregate sense, the Web is very dynamic, causing any repository of Web pages to become out of date over time, which in turn causes query answer quality to degrade. Given the considerable size, dynamicity, and degree of autonomy of the Web as a whole, it is not feasible for a search engine to maintain its repository exactly synchronized with the Web.In this paper we study how to schedule Web pages for selective (re)downloading into a search engine repository. The scheduling objective is to maximize the quality of the user experience for those who query the search engine. We begin with a quantitative characterization of the way in which the discrepancy between the content of the repository and the current content of the live Web impacts the quality of the user experience. This characterization leads to a user-centric metric of the quality of a search engine's local repository. We use this metric to derive a policy for scheduling Web page (re)downloading that is driven by search engine usage and free of exterior tuning parameters. We then focus on the important subproblem of scheduling refreshing of Web pages already present in the repository, and show how to compute the priorities efficiently. We provide extensive empirical comparisons of our user-centric method against prior Web page refresh strategies, using real Web data. Our results demonstrate that our method requires far fewer resources to maintain same search engine quality level for users, leaving substantially more resources available for incorporating new Web pages into the search repository.}, acmid = {1060805}, address = {New York, NY, USA}, author = {Pandey, Sandeep and Olston, Christopher}, booktitle = {Proceedings of the 14th international conference on World Wide Web}, doi = {10.1145/1060745.1060805}, interhash = {4d0e8067c9240b05c42bf8e174ffb1d1}, intrahash = {166a0a9f8d80beeab0c75961398d951f}, isbn = {1-59593-046-9}, location = {Chiba, Japan}, numpages = {11}, pages = {401--411}, publisher = {ACM}, title = {User-centric Web crawling}, url = {http://doi.acm.org/10.1145/1060745.1060805}, year = 2005 } @phdthesis{castillo2004effective, abstract = {The key factors for the success of the World Wide Web are its large size and the lack of a centralized control over its contents. Both issues are also the most important source of problems for locating information. The Web is a context in which traditional Information Retrieval methods are challenged, and given the volume of the Web and its speed of change, the coverage of modern search engines is relatively small. Moreover, the distribution of quality is very skewed, and interesting pages are scarce in comparison with the rest of the content. Web crawling is the process used by search engines to collect pages from the Web. This thesis studies Web crawling at several different levels, ranging from the long-term goal of crawling important pages first, to the short-term goal of using the network connectivity efficiently, including implementation issues that are essential for crawling in practice. We start by designing a new model and architecture for aWeb crawler that tightly integrates the crawler with the rest of the search engine, providing access to the metadata and links of the documents that can be used to guide the crawling process effectively. We implement this design in the WIRE project as an efficient Web crawler that provides an experimental framework for this research. In fact, we have used our crawler to characterize the Chilean Web, using the results as feedback to improve the crawler design. We argue that the number of pages on the Web can be considered infinite, and given that a Web crawler cannot download all the pages, it is important to capture the most important ones as early as possible during the crawling process. We propose, study, and implement algorithms for achieving this goal, showing that we can crawl 50% of a large Web collection and capture 80% of its total Pagerank value in both simulated and real Web environments. We also model and study user browsing behavior in Web sites, concluding that it is not necessary to go deeper than five levels from the home page to capture most of the pages actually visited by people, and support this conclusion with log analysis of several Web sites. We also propose several mechanisms for server cooperation to reduce network traffic and improve the representation of aWeb page in a search engine with the help of Web site managers.}, address = {Santiago, Chile}, author = {Castillo, Carlos}, institution = {University of Chile}, interhash = {36eac63e7cfae05bc7444171432a6f3f}, intrahash = {38b52bf7ccc2e1221477f5d8937c3b7d}, month = {November}, school = {School of Engineering}, title = {Effective Web Crawling}, url = {http://www.chato.cl/crawling/}, year = 2004 } @inproceedings{horowitz2010anatomy, abstract = {We present Aardvark, a social search engine. With Aardvark, users ask a question, either by instant message, email, web input, text message, or voice. Aardvark then routes the question to the person in the user's extended social network most likely to be able to answer that question. As compared to a traditional web search engine, where the challenge lies in finding the right document to satisfy a user's information need, the challenge in a social search engine like Aardvark lies in finding the right person to satisfy a user's information need. Further, while trust in a traditional search engine is based on authority, in a social search engine like Aardvark, trust is based on intimacy. We describe how these considerations inform the architecture, algorithms, and user interface of Aardvark, and how they are reflected in the behavior of Aardvark users.}, acmid = {1772735}, address = {New York, NY, USA}, author = {Horowitz, Damon and Kamvar, Sepandar D.}, booktitle = {Proceedings of the 19th international conference on World wide web}, doi = {10.1145/1772690.1772735}, interhash = {418d79b49ede3a8d15ef5eb8453094f0}, intrahash = {787ecbd5796ada03f15bdda85497e1fd}, isbn = {978-1-60558-799-8}, location = {Raleigh, North Carolina, USA}, numpages = {10}, pages = {431--440}, publisher = {ACM}, title = {The anatomy of a large-scale social search engine}, url = {http://doi.acm.org/10.1145/1772690.1772735}, year = 2010 } @inproceedings{jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarksby keywords called tags. The structure behindthese social systems, called folksonomies, can beviewed as a tripartite hypergraph of user, tag and resourcenodes. This underlying network shows specificstructural properties that explain its growth and the possibilityof serendipitous exploration.Search engines filter the vast information of the web.Queries describe a user’s information need. In responseto the displayed results of the search engine, users clickon the links of the result page as they expect the answerto be of relevance. The clickdata can be represented as afolksonomy in which queries are descriptions of clickedURLs. This poster analyzes the topological characteristicsof the resulting tripartite hypergraph of queries,users and bookmarks of two query logs and compares ittwo a snapshot of the folksonomy del.icio.us.}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {359e1eccdc524334d4a2ad51330f76ae}, publisher = {AAAI Press}, title = {Logsonomy -- A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, year = 2008 } @inproceedings{conf/cikm/DingFJPCPRDS04, author = {Ding, Li and Finin, Timothy W. and Joshi, Anupam and Pan, Rong and Cost, R. Scott and Peng, Yun and Reddivari, Pavan and Doshi, Vishal and Sachs, Joel}, booktitle = {CIKM}, ee = {http://doi.acm.org/10.1145/1031289}, interhash = {17031c743c455bf9de56e142d727dbb7}, intrahash = {ab5c85d78daba236ca1bb5ad49865ee5}, pages = {652-659}, title = {Swoogle: a search and metadata engine for the semantic web.}, url = {http://dblp.uni-trier.de/db/conf/cikm/cikm2004.html#DingFJPCPRDS04}, year = 2004 } @inproceedings{jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarksby keywords called tags. The structure behindthese social systems, called folksonomies, can beviewed as a tripartite hypergraph of user, tag and resourcenodes. This underlying network shows specificstructural properties that explain its growth and the possibilityof serendipitous exploration.Search engines filter the vast information of the web.Queries describe a user’s information need. In responseto the displayed results of the search engine, users clickon the links of the result page as they expect the answerto be of relevance. The clickdata can be represented as afolksonomy in which queries are descriptions of clickedURLs. This poster analyzes the topological characteristicsof the resulting tripartite hypergraph of queries,users and bookmarks of two query logs and compares ittwo a snapshot of the folksonomy del.icio.us.}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {359e1eccdc524334d4a2ad51330f76ae}, publisher = {AAAI Press}, title = {Logsonomy -- A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, year = 2008 } @inproceedings{krause2008logsonomy, abstract = {Social bookmarking systems constitute an established part of the Web 2.0. In such systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Today’s search engines represent the gateway to retrieve information from the World Wide Web. Short queries typically consisting of two to three words describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. This clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. The resulting network structure, which we will term logsonomy is very similar to the one of folksonomies. In order to find out about its properties, we analyze the topological characteristics of the tripartite hypergraph of queries, users and bookmarks on a large snapshot of del.icio.us and on query logs of two large search engines. All of the three datasets show small world properties. The tagging behavior of users, which is explained by preferential attachment of the tags in social bookmark systems, is reflected in the distribution of single query words in search engines. We can conclude that the clicking behaviour of search engine users based on the displayed search results and the tagging behaviour of social bookmarking users is driven by similar dynamics.}, address = {New York, NY, USA}, author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd}, booktitle = {HT '08: Proceedings of the Nineteenth ACM Conference on Hypertext and Hypermedia}, doi = {10.1145/1379092.1379123}, interhash = {6d34ea1823d95b9dbf37d4db4d125d2a}, intrahash = {e64d14f3207766f4afc65983fa759ffe}, isbn = {978-1-59593-985-2}, location = {Pittsburgh, PA, USA}, pages = {157--166}, publisher = {ACM}, title = {Logsonomy - Social Information Retrieval with Logdata}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/krause2008logsonomy.pdf}, vgwort = {17}, year = 2008 } @inproceedings{jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Search engines filter the vast information of the web. Queries describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. The clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. This poster analyzes the topological characteristics of the resulting tripartite hypergraph of queries, users and bookmarks of two query logs and compares it two a snapshot of the folksonomy del.icio.us.}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {359e1eccdc524334d4a2ad51330f76ae}, publisher = {AAAI Press}, title = {Logsonomy — A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, vgwort = {7}, year = 2008 } @inproceedings{krause2008logsonomy, abstract = {Social bookmarking systems constitute an established part of the Web 2.0. In such systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Today’s search engines represent the gateway to retrieve information from the World Wide Web. Short queries typically consisting of two to three words describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. This clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. The resulting network structure, which we will term logsonomy is very similar to the one of folksonomies. In order to find out about its properties, we analyze the topological characteristics of the tripartite hypergraph of queries, users and bookmarks on a large snapshot of del.icio.us and on query logs of two large search engines. All of the three datasets show small world properties. The tagging behavior of users, which is explained by preferential attachment of the tags in social bookmark systems, is reflected in the distribution of single query words in search engines. We can conclude that the clicking behaviour of search engine users based on the displayed search results and the tagging behaviour of social bookmarking users is driven by similar dynamics.}, address = {New York, NY, USA}, author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd}, booktitle = {HT '08: Proceedings of the nineteenth ACM conference on Hypertext and hypermedia}, doi = {http://doi.acm.org/10.1145/1379092.1379123}, interhash = {6d34ea1823d95b9dbf37d4db4d125d2a}, intrahash = {76d81124951ae39060a8bc98f4883435}, isbn = {978-1-59593-985-2}, location = {Pittsburgh, PA, USA}, pages = {157--166}, publisher = {ACM}, title = {Logsonomy - Social Information Retrieval with Logdata}, url = {http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia}, vgwort = {17}, year = 2008 } @inproceedings{Jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Search engines filter the vast information of the web. Queries describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. The clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. This poster analyzes the topological characteristics of the resulting tripartite hypergraph of queries, users and bookmarks of two query logs and compares it two a snapshot of the folksonomy del.icio.us.}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media(ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {359e1eccdc524334d4a2ad51330f76ae}, publisher = {AAAI Press}, title = {Logsonomy -- A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, year = 2008 } @inproceedings{Jaeschke2008logsonomy, abstract = {In social bookmarking systems users describe bookmarks by keywords called tags. The structure behind these social systems, called folksonomies, can be viewed as a tripartite hypergraph of user, tag and resource nodes. This underlying network shows specific structural properties that explain its growth and the possibility of serendipitous exploration. Search engines filter the vast information of the web. Queries describe a user’s information need. In response to the displayed results of the search engine, users click on the links of the result page as they expect the answer to be of relevance. The clickdata can be represented as a folksonomy in which queries are descriptions of clicked URLs. This poster analyzes the topological characteristics of the resulting tripartite hypergraph of queries, users and bookmarks of two query logs and compares it two a snapshot of the folksonomy del.icio.us. }, address = {Menlo Park, CA, USA}, author = {Jäschke, Robert and Krause, Beate and Hotho, Andreas and Stumme, Gerd}, booktitle = {Proceedings of the Second International Conference on Weblogs and Social Media (ICWSM 2008)}, interhash = {13ec3f45fc7e0364cdc6b9a7c12c5c2c}, intrahash = {7eb26a177187ea8cf788cc897d66ee48}, isbn = {978-1-57735-355-3}, pages = {192--193}, publisher = {AAAI Press}, title = {Logsonomy -- A Search Engine Folksonomy}, url = {http://www.kde.cs.uni-kassel.de/hotho/pub/2008/Krause2008logsonomy_short.pdf}, vgwort = {7}, year = 2008 }