@inproceedings{Stenneth:2011:TMD:2093973.2093982, abstract = {The transportation mode such as walking, cycling or on a train denotes an important characteristic of the mobile user's context. In this paper, we propose an approach to inferring a user's mode of transportation based on the GPS sensor on her mobile device and knowledge of the underlying transportation network. The transportation network information considered includes real time bus locations, spatial rail and spatial bus stop information. We identify and derive the relevant features related to transportation network information to improve classification effectiveness. This approach can achieve over 93.5% accuracy for inferring various transportation modes including: car, bus, aboveground train, walking, bike, and stationary. Our approach improves the accuracy of detection by 17% in comparison with the GPS only approach, and 9% in comparison with GPS with GIS models. The proposed approach is the first to distinguish between motorized transportation modes such as bus, car and aboveground train with such high accuracy. Additionally, if a user is travelling by bus, we provide further information about which particular bus the user is riding. Five different inference models including Bayesian Net, Decision Tree, Random Forest, Naïve Bayesian and Multilayer Perceptron, are tested in the experiments. The final classification system is deployed and available to the public.}, acmid = {2093982}, address = {New York, NY, USA}, author = {Stenneth, Leon and Wolfson, Ouri and Yu, Philip S. and Xu, Bo}, booktitle = {Proceedings of the 19th ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems}, doi = {10.1145/2093973.2093982}, interhash = {07950385ca6bb9138db4f20bb3dd7698}, intrahash = {6eff579bee29983fbf72403faa9b04ae}, isbn = {978-1-4503-1031-4}, location = {Chicago, Illinois}, numpages = {10}, pages = {54--63}, publisher = {ACM}, series = {GIS '11}, title = {Transportation Mode Detection Using Mobile Phones and GIS Information}, url = {http://doi.acm.org/10.1145/2093973.2093982}, year = 2011 } @inproceedings{yu2004temporal, abstract = {Web search is probably the single most important application on the Internet. The most famous search techniques are perhaps the PageRank and HITS algorithms. These algorithms are motivated by the observation that a hyperlink from a page to another is an implicit conveyance of authority to the target page. They exploit this social phenomenon to identify quality pages, e.g., "authority" pages and "hub" pages. In this paper we argue that these algorithms miss an important dimension of the Web, the temporal dimension. The Web is not a static environment. It changes constantly. Quality pages in the past may not be quality pages now or in the future. These techniques favor older pages because these pages have many in-links accumulated over time. New pages, which may be of high quality, have few or no in-links and are left behind. Bringing new and quality pages to users is important because most users want the latest information. Research publication search has exactly the same problem. This paper studies the temporal dimension of search in the context of research publication search. We propose a number of methods deal with the problem. Our experimental results show that these methods are highly effective.}, acmid = {1013519}, address = {New York, NY, USA}, author = {Yu, Philip S. and Li, Xin and Liu, Bing}, booktitle = {Proceedings of the 13th international World Wide Web conference on Alternate track papers \& posters}, doi = {10.1145/1013367.1013519}, interhash = {106cfcc83b1ea817d7961bdb346af9eb}, intrahash = {16f2087be646ae8430fd0ff514ec0cf1}, isbn = {1-58113-912-8}, location = {New York, NY, USA}, numpages = {2}, pages = {448--449}, publisher = {ACM}, series = {WWW Alt. '04}, title = {On the temporal dimension of search}, url = {http://doi.acm.org/10.1145/1013367.1013519}, year = 2004 } @inproceedings{conf/sigmod/WangWYY02, author = {Wang, Haixun and 0010, Wei Wang and Yang, Jiong and Yu, Philip S.}, booktitle = {SIGMOD Conference}, crossref = {conf/sigmod/2002}, date = {2009-06-28}, editor = {Franklin, Michael J. and Moon, Bongki and Ailamaki, Anastassia}, ee = {http://doi.acm.org/10.1145/564691.564737}, interhash = {9da0e61a2ac3ac371edfb251fbbfc2ae}, intrahash = {5ad941d8f0a06bb5e570e22a8cc58d92}, isbn = {1-58113-497-5}, pages = {394-405}, publisher = {ACM}, title = {Clustering by pattern similarity in large data sets.}, url = {http://dblp.uni-trier.de/db/conf/sigmod/sigmod2002.html#WangWYY02}, year = 2002 } @inproceedings{zhu2008graph, author = {Zhu, Feida and Chen, Chen and Yan, Xifeng and Han, Jiawei and Yu, Philip S}, booktitle = {Proc. 2008 Int. Conf. on Data Mining (ICDM'08), Pisa, Italy, Dec. 2008.}, interhash = {b9f7956dd1e140a386376df25f1a4117}, intrahash = {066f245bd365c69acfff1378d72dc01e}, month = {December}, title = {{Graph OLAP: Towards Online Analytical Processing on Graphs}}, year = 2008 } @article{wu2008wu, abstract = {This paper presents the top 10 data mining algorithms identified by the IEEE International Conference on Data Mining (ICDM) in December 2006: C4.5, k-Means, SVM, Apriori, EM, PageRank, AdaBoost, kNN, Naive Bayes, and CART. These top 10 algorithms are among the most influential data mining algorithms in the research community.With each algorithm, we provide a description of the algorithm, discuss the impact of the algorithm, and review current andfurther research on the algorithm. These 10 algorithms cover classification, clustering, statistical learning, associationanalysis, and link mining, which are all among the most important topics in data mining research and development.}, address = {London}, author = {Wu, Xindong and Kumar, Vipin and Quinlan, J. Ross and Ghosh, Joydeep and Yang, Qiang and Motoda, Hiroshi and McLachlan, Geoffrey and Ng, Angus and Liu, Bing and Yu, Philip and Zhou, Zhi-Hua and Steinbach, Michael and Hand, David and Steinberg, Dan}, interhash = {76fd294a34cf85638f6e194a85af8db9}, intrahash = {2c34bb4b49187a6d3e780e78d254ae1f}, issn = {0219-1377}, journal = {Knowledge and Information Systems}, month = Jan, number = 1, pages = {1--37}, publisher = {Springer}, title = {Top 10 algorithms in data mining}, url = {http://dx.doi.org/10.1007/s10115-007-0114-2}, volume = 14, year = 2008 } @inproceedings{conf/sdm/AggarwalY05, author = {Aggarwal, Charu C. and Yu, Philip S.}, booktitle = {SDM}, interhash = {e1487d660a1614b50bd756f7383b98ea}, intrahash = {bb72c8baa786e98565c4a7448ecae59a}, title = {Online Analysis of Community Evolution in Data Streams.}, url = {http://web.mit.edu/charu/www/aggar142.pdf }, year = 2005 } @inproceedings{DBLP:conf/sigmod/AggarwalY00, author = {Aggarwal, Charu C. and Yu, Philip S.}, bibsource = {DBLP, http://dblp.uni-trier.de}, booktitle = {Proceedings of the 2000 ACM SIGMOD International Conference on Management of Data, May 16-18, 2000, Dallas, Texas, USA}, editor = {Chen, Weidong and Naughton, Jeffrey F. and Bernstein, Philip A.}, ee = {db/conf/sigmod/AggarwalY00.html}, interhash = {962d1e1587481e9c47632915d1a56f19}, intrahash = {0f27054d7195531200761b0160df2978}, isbn = {1-58113-218-2}, pages = {70-81}, publisher = {ACM}, title = {Finding Generalized Projected Clusters In High Dimensional Spaces}, year = 2000 }