Lorince, J.; Joseph, K. & Todd, P.
(2015):
Analysis of Music Tagging and Listening Patterns: Do Tags Really Function as Retrieval Aids?.
In: Social Computing, Behavioral-Cultural Modeling, and Prediction.
9021. Aufl./Vol..
Hrsg./Editors: Agarwal, N.; Xu, K. & Osgood, N.
Verlag/Publisher: Springer International Publishing,
Erscheinungsjahr/Year: 2015.
Seiten/Pages: 141-152.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
In collaborative tagging systems, it is generally assumed that users assign tags to facilitate retrieval of content at a later time. There is, however, little behavioral evidence that tags actually serve this purpose. Using a large-scale dataset from the social music website Last.fm, we explore how patterns of music tagging and subsequent listening interact to determine if there exist measurable signals of tags functioning as retrieval aids. Specifically, we describe our methods for testing if the assignment of a tag tends to lead to an increase in listening behavior. Results suggest that tagging, on average, leads to only very small increases in listening rates, and overall the data do
@incollection{lorince2015analysis,
author = {Lorince, Jared and Joseph, Kenneth and Todd, PeterM.},
title = {Analysis of Music Tagging and Listening Patterns: Do Tags Really Function as Retrieval Aids?},
editor = {Agarwal, Nitin and Xu, Kevin and Osgood, Nathaniel},
booktitle = {Social Computing, Behavioral-Cultural Modeling, and Prediction},
series = {Lecture Notes in Computer Science},
publisher = {Springer International Publishing},
year = {2015},
volume = {9021},
pages = {141-152},
url = {http://dx.doi.org/10.1007/978-3-319-16268-3_15},
doi = {10.1007/978-3-319-16268-3_15},
isbn = {978-3-319-16267-6},
keywords = {folksonomy, last.fm, retrieval, tagging, usage},
abstract = {In collaborative tagging systems, it is generally assumed that users assign tags to facilitate retrieval of content at a later time. There is, however, little behavioral evidence that tags actually serve this purpose. Using a large-scale dataset from the social music website Last.fm, we explore how patterns of music tagging and subsequent listening interact to determine if there exist measurable signals of tags functioning as retrieval aids. Specifically, we describe our methods for testing if the assignment of a tag tends to lead to an increase in listening behavior. Results suggest that tagging, on average, leads to only very small increases in listening rates, and overall the data do }
}
%0 = incollection
%A = Lorince, Jared and Joseph, Kenneth and Todd, PeterM.
%B = Social Computing, Behavioral-Cultural Modeling, and Prediction
%D = 2015
%I = Springer International Publishing
%T = Analysis of Music Tagging and Listening Patterns: Do Tags Really Function as Retrieval Aids?
%U = http://dx.doi.org/10.1007/978-3-319-16268-3_15
Jäschke, R. & Rudolph, S.
(2013):
Attribute Exploration on the Web.
In: Contributions to the 11th International Conference on Formal Concept Analysis,
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
We propose an approach for supporting attribute exploration by web information retrieval, in particular by posing appropriate queries to search engines, crowd sourcing systems, and the linked open data cloud. We discuss underlying general assumptions for this to work and the degree to which these can be taken for granted.
@inproceedings{jaschke2013attribute,
author = {Jäschke, Robert and Rudolph, Sebastian},
title = {Attribute Exploration on the Web},
editor = {Cellier, Peggy and Distel, Felix and Ganter, Bernhard},
booktitle = {Contributions to the 11th International Conference on Formal Concept Analysis},
year = {2013},
pages = {19--34},
url = {http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-113133},
keywords = {2013, acquisition, analysis, attribute, computing, concept, crowdsourcing, data, exploration, fca, formal, human, information, ir, iteg, knowledge, l3s, linked, lod, open, retrieval, search, sparql, web},
abstract = {We propose an approach for supporting attribute exploration by web information retrieval, in particular by posing appropriate queries to search engines, crowd sourcing systems, and the linked open data cloud. We discuss underlying general assumptions for this to work and the degree to which these can be taken for granted.}
}
%0 = inproceedings
%A = Jäschke, Robert and Rudolph, Sebastian
%B = Contributions to the 11th International Conference on Formal Concept Analysis
%D = 2013
%T = Attribute Exploration on the Web
%U = http://nbn-resolving.de/urn:nbn:de:bsz:14-qucosa-113133
Gomes, D.; Cruz, D.; Miranda, J.; Costa, M. & Fontes, S.
(2012):
Creating a searchable web archive. Portugal
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
The web became a mass means of publication that has been replacing printed media. However, its information is extremely ephemeral. Currently, most of the information available on the web is less than 1 year old. There are several initiatives worldwide that struggle to archive information from the web before it vanishes. However, search mechanisms to access this information are still limited and do not satisfy their users that demand performance similar to live- web search engines. This paper presents some of the work developed to create an effi�cient and effective searchable web archive service, from data acquisition to user interface design. The results of research were applied in practice to create the Portuguese Web Archive that is publicly available since January 2010. It supports full-text search over 1 billion contents archived from 1996 to 2010. The developed software is available as an open source project.
@techreport{gomes2012creating,
author = {Gomes, Daniel and Cruz, David and Miranda, João and Costa, Miguel and Fontes, Simão},
title = {Creating a searchable web archive},
address = {Portugal},
year = {2012},
url = {http://web.ist.utl.pt/joaocarvalhomiranda/docs/other/creating-a-searchable-web-archive-relatorio.pdf},
keywords = {archive, information, ir, retrieval, search, web},
abstract = {The web became a mass means of publication that has been replacing printed media. However, its information is extremely ephemeral. Currently, most of the information available on the web is less than 1 year old. There are several initiatives worldwide that struggle to archive information from the web before it vanishes. However, search mechanisms to access this information are still limited and do not satisfy their users that demand performance similar to live- web search engines. This paper presents some of the work developed to create an effi�cient and effective searchable web archive service, from data acquisition to user interface design. The results of research were applied in practice to create the Portuguese Web Archive that is publicly available since January 2010. It supports full-text search over 1 billion contents archived from 1996 to 2010. The developed software is available as an open source project.}
}
%0 = techreport
%A = Gomes, Daniel and Cruz, David and Miranda, João and Costa, Miguel and Fontes, Simão
%C = Portugal
%D = 2012
%T = Creating a searchable web archive
%U = http://web.ist.utl.pt/joaocarvalhomiranda/docs/other/creating-a-searchable-web-archive-relatorio.pdf
Poelmans, J.; Elzinga, P.; Viaene, S.; Dedene, G. & Kuznetsov, S. O.
(2011):
Text Mining Scientific Papers: a Survey on FCA-based Information Retrieval Research..
In: Industrial Conference on Data Mining - Poster and Industry Proceedings,
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
Formal Concept Analysis (FCA) is an unsupervised clustering technique and many scientific papers are devoted to applying FCA in Information Retrieval (IR) research. We collected 103 papers published between 2003-2009 which mention FCA and information retrieval in the abstract, title or keywords. Using a prototype of our FCA-based toolset CORDIET, we converted the pdf-files containing the papers to plain text, indexed them with Lucene using a thesaurus containing terms related to FCA research and then created the concept lattice shown in this paper. We visualized, analyzed and explored the literature with concept lattices and discovered multiple interesting research streams in IR of which we give an extensive overview. The core contributions of this paper are the innovative application of FCA to the text mining of scientific papers and the survey of the FCA-based IR research.
@inproceedings{poelmans2011mining,
author = {Poelmans, Jonas and Elzinga, Paul and Viaene, Stijn and Dedene, Guido and Kuznetsov, Sergei O.},
title = {Text Mining Scientific Papers: a Survey on FCA-based Information Retrieval Research.},
editor = {Perner, Petra},
booktitle = {Industrial Conference on Data Mining - Poster and Industry Proceedings},
publisher = {IBaI Publishing},
year = {2011},
pages = {82--96},
url = {http://dblp.uni-trier.de/db/conf/incdm/incdm2011p.html#PoelmansEVDK11},
isbn = {978-3-942954-06-4},
keywords = {analysis, concept, fca, formal, information, ir, retrieval},
abstract = {Formal Concept Analysis (FCA) is an unsupervised clustering technique and many scientific papers are devoted to applying FCA in Information Retrieval (IR) research. We collected 103 papers published between 2003-2009 which mention FCA and information retrieval in the abstract, title or keywords. Using a prototype of our FCA-based toolset CORDIET, we converted the pdf-files containing the papers to plain text, indexed them with Lucene using a thesaurus containing terms related to FCA research and then created the concept lattice shown in this paper. We visualized, analyzed and explored the literature with concept lattices and discovered multiple interesting research streams in IR of which we give an extensive overview. The core contributions of this paper are the innovative application of FCA to the text mining of scientific papers and the survey of the FCA-based IR research. }
}
%0 = inproceedings
%A = Poelmans, Jonas and Elzinga, Paul and Viaene, Stijn and Dedene, Guido and Kuznetsov, Sergei O.
%B = Industrial Conference on Data Mining - Poster and Industry Proceedings
%D = 2011
%I = IBaI Publishing
%T = Text Mining Scientific Papers: a Survey on FCA-based Information Retrieval Research.
%U = http://dblp.uni-trier.de/db/conf/incdm/incdm2011p.html#PoelmansEVDK11
Croft, W. B.; Metzler, D. & Strohman, T. (Hrsg.)
(2010):
Search engines: information retrieval in practice.
1st. Aufl./Vol..
Erscheinungsjahr/Year: 2010.
Verlag/Publisher: Addison-Wesley,
Boston.
[Volltext] [BibTeX]
[Endnote]
@book{croft2010search,
author = {Croft, W. Bruce and Metzler, Donald and Strohman, Trevor},
title = {Search engines: information retrieval in practice},
publisher = {Addison-Wesley},
address = {Boston},
year = {2010},
edition = {1st},
url = {http://www.amazon.com/Search-Engines-Information-Retrieval-Practice/dp/0136072240},
isbn = {9780136072249 0136072240},
keywords = {engine, information, practice, retrieval, search}
}
%0 = book
%A = Croft, W. Bruce and Metzler, Donald and Strohman, Trevor
%C = Boston
%D = 2010
%I = Addison-Wesley
%T = Search engines: information retrieval in practice
%U = http://www.amazon.com/Search-Engines-Information-Retrieval-Practice/dp/0136072240
Dong, X.; Chen, X.; Guan, Y.; Yu, Z. & Li, S.
(2009):
An Overview of Learning to Rank for Information Retrieval..
In: CSIE (3),
[Volltext]
[BibTeX][Endnote]
@inproceedings{dong2009overview,
author = {Dong, Xishuang and Chen, Xiaodong and Guan, Yi and Yu, Zhiming and Li, Sheng},
title = {An Overview of Learning to Rank for Information Retrieval.},
editor = {Burgin, Mark and Chowdhury, Masud H. and Ham, Chan H. and Ludwig, Simone A. and Su, Weilian and Yenduri, Sumanth},
booktitle = {CSIE (3)},
publisher = {IEEE Computer Society},
year = {2009},
pages = {600-606},
url = {http://dblp.uni-trier.de/db/conf/csie/csie2009-3.html#DongCGYL09},
isbn = {978-0-7695-3507-4},
keywords = {information, learning, learning-to-rank, overview, rank, retrieval}
}
%0 = inproceedings
%A = Dong, Xishuang and Chen, Xiaodong and Guan, Yi and Yu, Zhiming and Li, Sheng
%B = CSIE (3)
%D = 2009
%I = IEEE Computer Society
%T = An Overview of Learning to Rank for Information Retrieval.
%U = http://dblp.uni-trier.de/db/conf/csie/csie2009-3.html#DongCGYL09
Ritchie, A.
(2009):
Citation context analysis for information retrieval. Cambridge, UK
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
This thesis investigates taking words from around citations to scientific papers in order to create an enhanced document representation for improved information retrieval. This method parallels how anchor text is commonly used in Web retrieval. In previous work, words from citing documents have been used as an alternative representation of the cited document but no previous experiment has combined them with a full-text document representation and measured effectiveness in a large scale evaluation. The contributions of this thesis are twofold: firstly, we present a novel document representation, along with experiments to measure its effect on retrieval effectiveness, and, secondly, we document the construction of a new, realistic test collection of scientific research papers, with references (in the bibliography) and their associated citations (in the running text of the paper) automatically annotated. Our experiments show that the citation-enhanced document representation increases retrieval effectiveness across a range of standard retrieval models and evaluation measures. In Chapter 2, we give the background to our work, discussing the various areas from which we draw together ideas: information retrieval, particularly link structure analysis and anchor text indexing, and bibliometrics, in particular citation analysis. We show that there is a close relatedness of ideas between these areas but that these ideas have not been fully explored experimentally. Chapter 3 discusses the test collection paradigm for evaluation of information retrieval systems and describes how and why we built our test collection. In Chapter 4, we introduce the ACL Anthology, the archive of computational linguistics papers that our test collection is centred around. The archive contains the most prominent publications since the beginning of the field in the early 1960s, consisting of one journal plus conferences and workshops, resulting in over 10,000 papers. Chapter 5 describes how the PDF papers are prepared for our experiments, including identification of references and citations in the papers, once converted to plain text, and extraction of citation information to an XML database. Chapter 6 presents our experiments: we show that adding citation terms to the full-text of the papers improves retrieval effectiveness by up to 7.4%, that weighting citation terms higher relative to paper terms increases the improvement and that varying the context from which citation terms are taken has a significant effect on retrieval effectiveness. Our main hypothesis that citation terms enhance a full-text representation of scientific papers is thus proven. There are some limitations to these experiments. The relevance judgements in our test collection are incomplete but we have experimentally verified that the test collection is, nevertheless, a useful evaluation tool. Using the Lemur toolkit constrained the method that we used to weight citation terms; we would like to experiment with a more realistic implementation of term weighting. Our experiments with different citation contexts did not conclude an optimal citation context; we would like to extend the scope of our investigation. Now that our test collection exists, we can address these issues in our experiments and leave the door open for more extensive experimentation.
@techreport{ritchie2009citation,
author = {Ritchie, Anna},
title = {Citation context analysis for information retrieval},
address = {Cambridge, UK},
year = {2009},
number = {744},
url = {https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-744.pdf},
issn = {1476-2986},
keywords = {citation, context, information, ir, retrieval, scientometrics},
abstract = {This thesis investigates taking words from around citations to scientific papers in order to create an enhanced document representation for improved information retrieval. This method parallels how anchor text is commonly used in Web retrieval. In previous work, words from citing documents have been used as an alternative representation of the cited document but no previous experiment has combined them with a full-text document representation and measured effectiveness in a large scale evaluation. The contributions of this thesis are twofold: firstly, we present a novel document representation, along with experiments to measure its effect on retrieval effectiveness, and, secondly, we document the construction of a new, realistic test collection of scientific research papers, with references (in the bibliography) and their associated citations (in the running text of the paper) automatically annotated. Our experiments show that the citation-enhanced document representation increases retrieval effectiveness across a range of standard retrieval models and evaluation measures. In Chapter 2, we give the background to our work, discussing the various areas from which we draw together ideas: information retrieval, particularly link structure analysis and anchor text indexing, and bibliometrics, in particular citation analysis. We show that there is a close relatedness of ideas between these areas but that these ideas have not been fully explored experimentally. Chapter 3 discusses the test collection paradigm for evaluation of information retrieval systems and describes how and why we built our test collection. In Chapter 4, we introduce the ACL Anthology, the archive of computational linguistics papers that our test collection is centred around. The archive contains the most prominent publications since the beginning of the field in the early 1960s, consisting of one journal plus conferences and workshops, resulting in over 10,000 papers. Chapter 5 describes how the PDF papers are prepared for our experiments, including identification of references and citations in the papers, once converted to plain text, and extraction of citation information to an XML database. Chapter 6 presents our experiments: we show that adding citation terms to the full-text of the papers improves retrieval effectiveness by up to 7.4%, that weighting citation terms higher relative to paper terms increases the improvement and that varying the context from which citation terms are taken has a significant effect on retrieval effectiveness. Our main hypothesis that citation terms enhance a full-text representation of scientific papers is thus proven. There are some limitations to these experiments. The relevance judgements in our test collection are incomplete but we have experimentally verified that the test collection is, nevertheless, a useful evaluation tool. Using the Lemur toolkit constrained the method that we used to weight citation terms; we would like to experiment with a more realistic implementation of term weighting. Our experiments with different citation contexts did not conclude an optimal citation context; we would like to extend the scope of our investigation. Now that our test collection exists, we can address these issues in our experiments and leave the door open for more extensive experimentation. }
}
%0 = techreport
%A = Ritchie, Anna
%C = Cambridge, UK
%D = 2009
%T = Citation context analysis for information retrieval
%U = https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-744.pdf
Krause, B.; Jäschke, R.; Hotho, A. & Stumme, G.
(2008):
Logsonomy - Social Information Retrieval with Logdata.
In: HT '08: Proceedings of the nineteenth ACM conference on Hypertext and hypermedia,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
Social bookmarking systems constitute an established
rt of the Web 2.0. In such systems
ers describe bookmarks by keywords
lled tags. The structure behind these social
stems, called folksonomies, can be viewed
a tripartite hypergraph of user, tag and resource
des. This underlying network shows
ecific structural properties that explain its
owth and the possibility of serendipitous
ploration.
day’s search engines represent the gateway
retrieve information from the World Wide
b. Short queries typically consisting of
o to three words describe a user’s information
ed. In response to the displayed
sults of the search engine, users click on
e links of the result page as they expect
e answer to be of relevance.
is clickdata can be represented as a folksonomy
which queries are descriptions of
icked URLs. The resulting network structure,
ich we will term logsonomy is very
milar to the one of folksonomies. In order
find out about its properties, we analyze
e topological characteristics of the tripartite
pergraph of queries, users and bookmarks
a large snapshot of del.icio.us and
query logs of two large search engines.
l of the three datasets show small world
operties. The tagging behavior of users,
ich is explained by preferential attachment
the tags in social bookmark systems, is
flected in the distribution of single query
rds in search engines. We can conclude
at the clicking behaviour of search engine
ers based on the displayed search results
d the tagging behaviour of social bookmarking
ers is driven by similar dynamics.
@inproceedings{krause2008logsonomy,
author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd},
title = {Logsonomy - Social Information Retrieval with Logdata},
booktitle = {HT '08: Proceedings of the nineteenth ACM conference on Hypertext and hypermedia},
publisher = {ACM},
address = {New York, NY, USA},
year = {2008},
pages = {157--166},
url = {http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia},
doi = {http://doi.acm.org/10.1145/1379092.1379123},
isbn = {978-1-59593-985-2},
keywords = {folksonomy, information, logsonomy, retrieval, social},
abstract = {Social bookmarking systems constitute an established
rt of the Web 2.0. In such systems
ers describe bookmarks by keywords
lled tags. The structure behind these social
stems, called folksonomies, can be viewed
a tripartite hypergraph of user, tag and resource
des. This underlying network shows
ecific structural properties that explain its
owth and the possibility of serendipitous
ploration.
day’s search engines represent the gateway
retrieve information from the World Wide
b. Short queries typically consisting of
o to three words describe a user’s information
ed. In response to the displayed
sults of the search engine, users click on
e links of the result page as they expect
e answer to be of relevance.
is clickdata can be represented as a folksonomy
which queries are descriptions of
icked URLs. The resulting network structure,
ich we will term logsonomy is very
milar to the one of folksonomies. In order
find out about its properties, we analyze
e topological characteristics of the tripartite
pergraph of queries, users and bookmarks
a large snapshot of del.icio.us and
query logs of two large search engines.
l of the three datasets show small world
operties. The tagging behavior of users,
ich is explained by preferential attachment
the tags in social bookmark systems, is
flected in the distribution of single query
rds in search engines. We can conclude
at the clicking behaviour of search engine
ers based on the displayed search results
d the tagging behaviour of social bookmarking
ers is driven by similar dynamics.}
}
%0 = inproceedings
%A = Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd
%B = HT '08: Proceedings of the nineteenth ACM conference on Hypertext and hypermedia
%C = New York, NY, USA
%D = 2008
%I = ACM
%T = Logsonomy - Social Information Retrieval with Logdata
%U = http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia
Krause, B.; Jäschke, R.; Hotho, A. & Stumme, G.
(2008):
Logsonomy - Social Information Retrieval with Logdata.
In: HT '08: Proceedings of the Nineteenth ACM Conference on Hypertext and Hypermedia,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
Social bookmarking systems constitute an established
rt of the Web 2.0. In such systems
ers describe bookmarks by keywords
lled tags. The structure behind these social
stems, called folksonomies, can be viewed
a tripartite hypergraph of user, tag and resource
des. This underlying network shows
ecific structural properties that explain its
owth and the possibility of serendipitous
ploration.
day’s search engines represent the gateway
retrieve information from the World Wide
b. Short queries typically consisting of
o to three words describe a user’s information
ed. In response to the displayed
sults of the search engine, users click on
e links of the result page as they expect
e answer to be of relevance.
is clickdata can be represented as a folksonomy
which queries are descriptions of
icked URLs. The resulting network structure,
ich we will term logsonomy is very
milar to the one of folksonomies. In order
find out about its properties, we analyze
e topological characteristics of the tripartite
pergraph of queries, users and bookmarks
a large snapshot of del.icio.us and
query logs of two large search engines.
l of the three datasets show small world
operties. The tagging behavior of users,
ich is explained by preferential attachment
the tags in social bookmark systems, is
flected in the distribution of single query
rds in search engines. We can conclude
at the clicking behaviour of search engine
ers based on the displayed search results
d the tagging behaviour of social bookmarking
ers is driven by similar dynamics.
@inproceedings{krause2008logsonomy,
author = {Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd},
title = {Logsonomy - Social Information Retrieval with Logdata},
booktitle = {HT '08: Proceedings of the Nineteenth ACM Conference on Hypertext and Hypermedia},
publisher = {ACM},
address = {New York, NY, USA},
year = {2008},
pages = {157--166},
url = {http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia},
doi = {http://doi.acm.org/10.1145/1379092.1379123},
isbn = {978-1-59593-985-2},
keywords = {2.0, 2008, analysis, folksonomy, information, itegpub, logsonomy, myown, network, retrieval, search, social, tagorapub, web, web2.0, web20},
abstract = {Social bookmarking systems constitute an established
rt of the Web 2.0. In such systems
ers describe bookmarks by keywords
lled tags. The structure behind these social
stems, called folksonomies, can be viewed
a tripartite hypergraph of user, tag and resource
des. This underlying network shows
ecific structural properties that explain its
owth and the possibility of serendipitous
ploration.
day’s search engines represent the gateway
retrieve information from the World Wide
b. Short queries typically consisting of
o to three words describe a user’s information
ed. In response to the displayed
sults of the search engine, users click on
e links of the result page as they expect
e answer to be of relevance.
is clickdata can be represented as a folksonomy
which queries are descriptions of
icked URLs. The resulting network structure,
ich we will term logsonomy is very
milar to the one of folksonomies. In order
find out about its properties, we analyze
e topological characteristics of the tripartite
pergraph of queries, users and bookmarks
a large snapshot of del.icio.us and
query logs of two large search engines.
l of the three datasets show small world
operties. The tagging behavior of users,
ich is explained by preferential attachment
the tags in social bookmark systems, is
flected in the distribution of single query
rds in search engines. We can conclude
at the clicking behaviour of search engine
ers based on the displayed search results
d the tagging behaviour of social bookmarking
ers is driven by similar dynamics.}
}
%0 = inproceedings
%A = Krause, Beate and Jäschke, Robert and Hotho, Andreas and Stumme, Gerd
%B = HT '08: Proceedings of the Nineteenth ACM Conference on Hypertext and Hypermedia
%C = New York, NY, USA
%D = 2008
%I = ACM
%T = Logsonomy - Social Information Retrieval with Logdata
%U = http://portal.acm.org/citation.cfm?id=1379092.1379123&coll=ACM&dl=ACM&type=series&idx=SERIES399&part=series&WantType=Journals&title=Proceedings%20of%20the%20nineteenth%20ACM%20conference%20on%20Hypertext%20and%20hypermedia
Manning, C. D.; Raghavan, P. & Schütze, H. (Hrsg.)
(2008):
Introduction to Information Retrieval.
Erscheinungsjahr/Year: 2008.
Verlag/Publisher: Cambridge University Press,
[BibTeX]
[Endnote]
@book{manning2008,
author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich},
title = {Introduction to Information Retrieval},
publisher = {Cambridge University Press},
year = {2008},
keywords = {information, introduction, ir, retrieval, sota}
}
%0 = book
%A = Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich
%D = 2008
%I = Cambridge University Press
%T = Introduction to Information Retrieval
Manning, C. D.; Raghavan, P. & Schütze, H. (Hrsg.)
(2008):
Introduction to Information Retrieval.
Erscheinungsjahr/Year: 2008.
Verlag/Publisher: Cambridge University Press,
New York.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
"Class-tested and coherent, this textbook teaches classical and web information retrieval, including web search and the related areas of text classification and text clustering from basic concepts. It gives an up-to-date treatment of all aspects of the design and implementation of systems for gathering, indexing, and searching documents; methods for evaluating systems; and an introduction to the use of machine learning methods on text collections. All the important ideas are explained using examples and figures, making it perfect for introductory courses in information retrieval for advanced undergraduates and graduate students in computer science. Based on feedback from extensive classroom experience, the book has been carefully structured in order to make teaching more natural and effective. Slides and additional exercises (with solutions for lecturers) are also available through the book's supporting website to help course instructors prepare their lectures." - Publisher's description.
@book{manning2008introduction,
author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich},
title = {Introduction to Information Retrieval},
publisher = {Cambridge University Press},
address = {New York},
year = {2008},
url = {http://www.amazon.com/Introduction-Information-Retrieval-Christopher-Manning/dp/0521865719/ref=sr_1_1?ie=UTF8&qid=1337379279&sr=8-1},
isbn = {9780521865715 0521865719},
keywords = {book, citedBy:doerfel2012leveraging, information, introduction, ir, retrieval},
abstract = {"Class-tested and coherent, this textbook teaches classical and web information retrieval, including web search and the related areas of text classification and text clustering from basic concepts. It gives an up-to-date treatment of all aspects of the design and implementation of systems for gathering, indexing, and searching documents; methods for evaluating systems; and an introduction to the use of machine learning methods on text collections. All the important ideas are explained using examples and figures, making it perfect for introductory courses in information retrieval for advanced undergraduates and graduate students in computer science. Based on feedback from extensive classroom experience, the book has been carefully structured in order to make teaching more natural and effective. Slides and additional exercises (with solutions for lecturers) are also available through the book's supporting website to help course instructors prepare their lectures." -- Publisher's description.}
}
%0 = book
%A = Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich
%C = New York
%D = 2008
%I = Cambridge University Press
%T = Introduction to Information Retrieval
%U = http://www.amazon.com/Introduction-Information-Retrieval-Christopher-Manning/dp/0521865719/ref=sr_1_1?ie=UTF8&qid=1337379279&sr=8-1
Manning, C. D.; Raghavan, P. & Schütze, H. (Hrsg.)
(2008):
Introduction to Information Retrieval.
Erscheinungsjahr/Year: 2008.
Verlag/Publisher: Cambridge University Press,
New York.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
"Class-tested and coherent, this textbook teaches classical and web information retrieval, including web search and the related areas of text classification and text clustering from basic concepts. It gives an up-to-date treatment of all aspects of the design and implementation of systems for gathering, indexing, and searching documents; methods for evaluating systems; and an introduction to the use of machine learning methods on text collections. All the important ideas are explained using examples and figures, making it perfect for introductory courses in information retrieval for advanced undergraduates and graduate students in computer science. Based on feedback from extensive classroom experience, the book has been carefully structured in order to make teaching more natural and effective. Slides and additional exercises (with solutions for lecturers) are also available through the book's supporting website to help course instructors prepare their lectures." - Publisher's description.
@book{manning2008introduction,
author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich},
title = {Introduction to Information Retrieval},
publisher = {Cambridge University Press},
address = {New York},
year = {2008},
url = {http://www.amazon.com/Introduction-Information-Retrieval-Christopher-Manning/dp/0521865719/ref=sr_1_1?ie=UTF8&qid=1337379279&sr=8-1},
isbn = {9780521865715 0521865719},
keywords = {information, ir, retrieval},
abstract = {"Class-tested and coherent, this textbook teaches classical and web information retrieval, including web search and the related areas of text classification and text clustering from basic concepts. It gives an up-to-date treatment of all aspects of the design and implementation of systems for gathering, indexing, and searching documents; methods for evaluating systems; and an introduction to the use of machine learning methods on text collections. All the important ideas are explained using examples and figures, making it perfect for introductory courses in information retrieval for advanced undergraduates and graduate students in computer science. Based on feedback from extensive classroom experience, the book has been carefully structured in order to make teaching more natural and effective. Slides and additional exercises (with solutions for lecturers) are also available through the book's supporting website to help course instructors prepare their lectures." -- Publisher's description.}
}
%0 = book
%A = Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich
%C = New York
%D = 2008
%I = Cambridge University Press
%T = Introduction to Information Retrieval
%U = http://www.amazon.com/Introduction-Information-Retrieval-Christopher-Manning/dp/0521865719/ref=sr_1_1?ie=UTF8&qid=1337379279&sr=8-1
Cha, S.-H.
(2007):
Comprehensive Survey on Distance/Similarity Measures between Probability Density Functions.
In: International Journal of Mathematical Models and Methods in Applied Sciences,
Ausgabe/Number: 4,
Vol. 1,
Erscheinungsjahr/Year: 2007.
Seiten/Pages: 300-307.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Distance or similarity measures are essential to solve many pattern recognition problems such as classification, clustering, and retrieval problems. Various distance/similarity measures that are applicable to compare two probability density functions, pdf in short, are reviewed and categorized in both syntactic and semantic relationships. A correlation coefficient and a hierarchical clustering technique are adopted to reveal similarities among numerous distance/similarity measures.
@article{cha2007comprehensive,
author = {Cha, Sung-Hyuk},
title = {Comprehensive Survey on Distance/Similarity Measures between Probability Density Functions},
journal = {International Journal of Mathematical Models and Methods in Applied Sciences},
year = {2007},
volume = {1},
number = {4},
pages = {300--307},
url = {http://www.gly.fsu.edu/~parker/geostats/Cha.pdf},
keywords = {distance, information, ir, measure, retrieval, similarity},
abstract = {Distance or similarity measures are essential to solve many pattern recognition problems such as classification, clustering, and retrieval problems. Various distance/similarity measures that are applicable to compare two probability density functions, pdf in short, are reviewed and categorized in both syntactic and semantic relationships. A correlation coefficient and a hierarchical clustering technique are adopted to reveal similarities among numerous distance/similarity measures.}
}
%0 = article
%A = Cha, Sung-Hyuk
%D = 2007
%T = Comprehensive Survey on Distance/Similarity Measures between Probability Density Functions
%U = http://www.gly.fsu.edu/~parker/geostats/Cha.pdf
Hotho, A.; J?schke, R.; Schmitz, C. & Stumme, G.
(2006):
Information Retrieval in Folksonomies: Search and Ranking.
In: The Semantic Web: Research and Applications,
Heidelberg.
[BibTeX][Endnote]
@inproceedings{hotho2006information,
author = {Hotho, Andreas and J?schke, Robert and Schmitz, Christoph and Stumme, Gerd},
title = {Information Retrieval in Folksonomies: Search and Ranking},
editor = {Sure, York and Domingue, John},
booktitle = {The Semantic Web: Research and Applications},
series = {LNAI},
publisher = {Springer},
address = {Heidelberg},
year = {2006},
volume = {4011},
pages = {411-426},
keywords = {2006, FCA, IR, OntologyHandbook, folkrank, folksonomy, information, informationretrieval, itegpub, mimose, myown, pagerank, ranking, retrieval}
}
%0 = inproceedings
%A = Hotho, Andreas and J?schke, Robert and Schmitz, Christoph and Stumme, Gerd
%B = The Semantic Web: Research and Applications
%C = Heidelberg
%D = 2006
%I = Springer
%T = Information Retrieval in Folksonomies: Search and Ranking
Hotho, A.; Jäschke, R.; Schmitz, C. & Stumme, G.
(2006):
Information Retrieval in Folksonomies: Search and Ranking.
In: The Semantic Web: Research and Applications,
Heidelberg.
[BibTeX][Endnote]
@inproceedings{hotho2006information,
author = {Hotho, Andreas and Jäschke, Robert and Schmitz, Christoph and Stumme, Gerd},
title = {Information Retrieval in Folksonomies: Search and Ranking},
editor = {Sure, York and Domingue, John},
booktitle = {The Semantic Web: Research and Applications},
series = {LNAI},
publisher = {Springer},
address = {Heidelberg},
year = {2006},
volume = {4011},
pages = {411-426},
keywords = {2006, FCA, IR, OntologyHandbook, folkrank, folksonomy, information, informationretrieval, itegpub, mimose, myown, pagerank, ranking, retrieval}
}
%0 = inproceedings
%A = Hotho, Andreas and Jäschke, Robert and Schmitz, Christoph and Stumme, Gerd
%B = The Semantic Web: Research and Applications
%C = Heidelberg
%D = 2006
%I = Springer
%T = Information Retrieval in Folksonomies: Search and Ranking
Koester, B. (Hrsg.)
(2006):
FooCA: web information retrieval with formal concept analysis.
Erscheinungsjahr/Year: 2006.
Verlag/Publisher: Verlag Allgemeine Wissenschaft,
Mühltal.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
This book deals with Formal Concept Analysis (FCA) and its application to Web Information Retrieval. It explains how Web search results retrieved by major Web search engines such as Google or Yahoo can be conceptualized leading to a human-oriented form of representation. A generalization of Web search results is conducted, leading to an FCA-based introduction of FooCA. FooCA is an application in the field of Conceptual Knowledge Processing and supports the idea of a holistic representation of Web Information Retrieval.
@book{koester2006fooca,
author = {Koester, Bjoern},
title = {FooCA: web information retrieval with formal concept analysis},
series = {Beiträge zur begrifflichen Wissensverarbeitung},
publisher = {Verlag Allgemeine Wissenschaft},
address = {Mühltal},
year = {2006},
url = {http://www.bjoern-koester.de/fooca/web_information_retrieval_with_formal_concept_analysis.html},
keywords = {analysis, concept, fca, fooca, formal, information, ir, retrieval, web},
abstract = {This book deals with Formal Concept Analysis (FCA) and its application to Web Information Retrieval. It explains how Web search results retrieved by major Web search engines such as Google or Yahoo can be conceptualized leading to a human-oriented form of representation. A generalization of Web search results is conducted, leading to an FCA-based introduction of FooCA. FooCA is an application in the field of Conceptual Knowledge Processing and supports the idea of a holistic representation of Web Information Retrieval.}
}
%0 = book
%A = Koester, Bjoern
%C = Mühltal
%D = 2006
%I = Verlag Allgemeine Wissenschaft
%T = FooCA: web information retrieval with formal concept analysis
%U = http://www.bjoern-koester.de/fooca/web_information_retrieval_with_formal_concept_analysis.html
Broder, A.
(2002):
A taxonomy of web search.
In: SIGIR Forum,
Ausgabe/Number: 2,
Vol. 36,
Verlag/Publisher: ACM.
Erscheinungsjahr/Year: 2002.
Seiten/Pages: 3-10.
[Volltext] [Kurzfassung] [BibTeX]
[Endnote]
Classic IR (information retrieval) is inherently predicated on users searching for information, the so-called "information need". But the need behind a web search is often not informational - it might be navigational (give me the url of the site I want to reach) or transactional (show me sites where I can perform a certain transaction, e.g. shop, download a file, or find a map). We explore this taxonomy of web searches and discuss how global search engines evolved to deal with web-specific needs.
@article{broder2002taxonomy,
author = {Broder, Andrei},
title = {A taxonomy of web search},
journal = {SIGIR Forum},
publisher = {ACM},
address = {New York, NY, USA},
year = {2002},
volume = {36},
number = {2},
pages = {3--10},
url = {http://doi.acm.org/10.1145/792550.792552},
doi = {10.1145/792550.792552},
issn = {0163-5840},
keywords = {information, irony, retrieval, search, taxonomy, web},
abstract = {Classic IR (information retrieval) is inherently predicated on users searching for information, the so-called "information need". But the need behind a web search is often not informational -- it might be navigational (give me the url of the site I want to reach) or transactional (show me sites where I can perform a certain transaction, e.g. shop, download a file, or find a map). We explore this taxonomy of web searches and discuss how global search engines evolved to deal with web-specific needs.}
}
%0 = article
%A = Broder, Andrei
%C = New York, NY, USA
%D = 2002
%I = ACM
%T = A taxonomy of web search
%U = http://doi.acm.org/10.1145/792550.792552
Joachims, T.
(2002):
Optimizing search engines using clickthrough data.
In: Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.
@inproceedings{joachims2002optimizing,
author = {Joachims, Thorsten},
title = {Optimizing search engines using clickthrough data},
booktitle = {Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining},
publisher = {ACM},
address = {New York, NY, USA},
year = {2002},
pages = {133--142},
url = {http://doi.acm.org/10.1145/775047.775067},
doi = {10.1145/775047.775067},
isbn = {1-58113-567-X},
keywords = {click, data, feedback, information, ir, letor, retrieval, search, user, web},
abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}
}
%0 = inproceedings
%A = Joachims, Thorsten
%B = Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining
%C = New York, NY, USA
%D = 2002
%I = ACM
%T = Optimizing search engines using clickthrough data
%U = http://doi.acm.org/10.1145/775047.775067
Lawrence, S.; Bollacker, K. & Giles, C. L.
(1999):
Indexing and retrieval of scientific literature.
In: Proceedings of the eighth international conference on Information and knowledge management,
New York, NY, USA.
[Volltext]
[Kurzfassung] [BibTeX][Endnote]
<par>The web has greatly improved access to scientific literature. However, scientific articles on the web are largely disorganized, with research articles being spread across archive sites, institution sites, journal sites, and researcher homepages. No index covers all of the available literature, and the major web search engines typically do not index the content of Postscript/PDF documents at all. This paper discusses the creation of digital libraries of scientific literature on the web, including the efficient location of articles, full-text indexing of the articles, autonomous citation indexing, information extraction, display of query-sensitive summaries and citation context, hubs and authorities computation, similar document detection, user profiling, distributed error correction, graph analysis, and detection of overlapping documents. The software for the system is available at no cost for non-commercial use.</par>
@inproceedings{lawrence1999indexing,
author = {Lawrence, Steve and Bollacker, Kurt and Giles, C. Lee},
title = {Indexing and retrieval of scientific literature},
booktitle = {Proceedings of the eighth international conference on Information and knowledge management},
publisher = {ACM},
address = {New York, NY, USA},
year = {1999},
pages = {139--146},
url = {http://doi.acm.org/10.1145/319950.319970},
doi = {10.1145/319950.319970},
isbn = {1-58113-146-1},
keywords = {crawl, indexing, information, publication, research, retrieval, science},
abstract = {<par>The web has greatly improved access to scientific literature. However, scientific articles on the web are largely disorganized, with research articles being spread across archive sites, institution sites, journal sites, and researcher homepages. No index covers all of the available literature, and the major web search engines typically do not index the content of Postscript/PDF documents at all. This paper discusses the creation of digital libraries of scientific literature on the web, including the efficient location of articles, full-text indexing of the articles, autonomous citation indexing, information extraction, display of query-sensitive summaries and citation context, hubs and authorities computation, similar document detection, user profiling, distributed error correction, graph analysis, and detection of overlapping documents. The software for the system is available at no cost for non-commercial use.</par>}
}
%0 = inproceedings
%A = Lawrence, Steve and Bollacker, Kurt and Giles, C. Lee
%B = Proceedings of the eighth international conference on Information and knowledge management
%C = New York, NY, USA
%D = 1999
%I = ACM
%T = Indexing and retrieval of scientific literature
%U = http://doi.acm.org/10.1145/319950.319970
Steenweg, H.
(1992):
Computers and Social History: Building a Database from Mediaeval Tax Registers for improved Information Retrieval in Göttingen.
In: CAA 91, Computer Applications and Quantitative Methods in Archaeology (BAR International Series S 577),.
Hrsg./Editors: Lock, G. & Moffet, J.
London.
Erscheinungsjahr/Year: 1992.
Seiten/Pages: 29-38..
[BibTeX]
[Endnote]
@incollection{steenweg1992computers,
author = {Steenweg, Helge},
title = {Computers and Social History: Building a Database from Mediaeval Tax Registers for improved Information Retrieval in Göttingen},
editor = {Lock, G. and Moffet, J.},
booktitle = {CAA 91, Computer Applications and Quantitative Methods in Archaeology (BAR International Series S 577),},
address = {London},
year = {1992},
pages = {29-38.},
keywords = {1992, computer, database, datenbank, history, information-retrieval, mediaeval, mittelalter, myown, retrieval, social-structure, sozialgeschichte, sozialstruktur, tax-register, town}
}
%0 = incollection
%A = Steenweg, Helge
%B = CAA 91, Computer Applications and Quantitative Methods in Archaeology (BAR International Series S 577),
%C = London
%D = 1992
%T = Computers and Social History: Building a Database from Mediaeval Tax Registers for improved Information Retrieval in Göttingen