@article{jansen2006search, abstract = {The use of data stored in transaction logs of Web search engines, Intranets, and Web sites can provide valuable insight into understanding the information-searching process of online searchers. This understanding can enlighten information system design, interface development, and devising the information architecture for content collections. This article presents a review and foundation for conducting Web search transaction log analysis. A methodology is outlined consisting of three stages, which are collection, preparation, and analysis. The three stages of the methodology are presented in detail with discussions of goals, metrics, and processes at each stage. Critical terms in transaction log analysis for Web searching are defined. The strengths and limitations of transaction log analysis as a research method are presented. An application to log client-side interactions that supplements transaction logs is reported on, and the application is made available for use by the research community. Suggestions are provided on ways to leverage the strengths of, while addressing the limitations of, transaction log analysis for Web-searching research. Finally, a complete flat text transaction log from a commercial search engine is available as supplementary material with this manuscript.}, author = {Jansen, Bernard J.}, doi = {10.1016/j.lisr.2006.06.005}, interhash = {0488e60c424ea821ee7b3e3760ffd115}, intrahash = {e147f866b624d461c77a24b79b2d9aff}, issn = {0740-8188}, journal = {Library & Information Science Research}, number = 3, pages = {407 - 432}, title = {Search log analysis: What it is, what's been done, how to do it}, url = {http://www.sciencedirect.com/science/article/pii/S0740818806000673}, volume = 28, year = 2006 } @article{jansen2009patterns, abstract = {Query reformulation is a key user behavior during Web search. Our research goal is to develop predictive models of query reformulation during Web searching. This article reports results from a study in which we automatically classified the query-reformulation patterns for 964,780 Web searching sessions, composed of 1,523,072 queries, to predict the next query reformulation. We employed an n-gram modeling approach to describe the probability of users transitioning from one query-reformulation state to another to predict their next state. We developed first-, second-, third-, and fourth-order models and evaluated each model for accuracy of prediction, coverage of the dataset, and complexity of the possible pattern set. The results show that Reformulation and Assistance account for approximately 45% of all query reformulations; furthermore, the results demonstrate that the first- and second-order models provide the best predictability, between 28 and 40% overall and higher than 70% for some patterns. Implications are that the n-gram approach can be used for improving searching systems and searching assistance.}, author = {Jansen, Bernard J. and Booth, Danielle L. and Spink, Amanda}, doi = {10.1002/asi.21071}, interhash = {c72cb0657de6b51a3dc120521c64626d}, intrahash = {beb7d932ce7da2665184e5b3d933b8fa}, issn = {1532-2890}, journal = {Journal of the American Society for Information Science and Technology}, month = mar, number = 7, pages = {1358--1371}, publisher = {Wiley Subscription Services, Inc.}, title = {Patterns of query reformulation during Web searching}, url = {http://dx.doi.org/10.1002/asi.21071}, volume = 60, year = 2009 }