@inproceedings{Shen:2004:WCT:1008992.1009035, abstract = {Web-page classification is much more difficult than pure-text classification due to a large variety of noisy information embedded in Web pages. In this paper, we propose a new Web-page classification algorithm based on Web summarization for improving the accuracy. We first give empirical evidence that ideal Web-page summaries generated by human editors can indeed improve the performance of Web-page classification algorithms. We then propose a new Web summarization-based classification algorithm and evaluate it along with several other state-of-the-art text summarization algorithms on the LookSmart Web directory. Experimental results show that our proposed summarization-based classification algorithm achieves an approximately 8.8% improvement as compared to pure-text-based classification algorithm. We further introduce an ensemble classifier using the improved summarization algorithm and show that it achieves about 12.9% improvement over pure-text based methods.}, acmid = {1009035}, address = {New York, NY, USA}, author = {Shen, Dou and Chen, Zheng and Yang, Qiang and Zeng, Hua-Jun and Zhang, Benyu and Lu, Yuchang and Ma, Wei-Ying}, booktitle = {Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/1008992.1009035}, interhash = {328ff5b51cb573cd1d253f339892c029}, intrahash = {b83fca9d43e5afdea78b9791cc07890c}, isbn = {1-58113-881-4}, location = {Sheffield, United Kingdom}, numpages = {8}, pages = {242--249}, publisher = {ACM}, series = {SIGIR '04}, title = {Web-page classification through summarization}, url = {http://doi.acm.org/10.1145/1008992.1009035}, year = 2004 } @inproceedings{krestel2008art, abstract = {

Collaborative tagging, supported by many social networking websites, is currently enjoying an increasing popularity. The usefulness of this largely available tag data has been explored in many applications including web resources categorization,deriving emergent semantics, web search etc. However, since tags are supplied by users <em>freely</em> , not all of them are useful and reliable, especially when they are generated by spammers with malicious intent. Therefore, identifying tags of high quality is crucial in improving the performance of applications based on tags. In this paper, we propose TRP-Rank (Tag-Resource Pair Rank), an algorithm to measure the quality of tags by manually assessing a seed set and <em>propagating the quality</em> through a graph. The three dimensional relationship among users, tags and web resources is firstly represented by a graph structure. A set of seed nodes, where each node represents a tag annotating a resource, is then selected and their quality is assessed. The quality of the remaining nodes is calculated by propagating the known quality of the seeds through the graph structure. We evaluate our approach on a public data set where tags generated by suspicious spammers were manually labelled. The experimental results demonstrate the effectiveness of this approach in measuring the quality of tags.

}, acmid = {1484165}, address = {Berlin, Heidelberg}, author = {Krestel, Ralf and Chen, Ling}, booktitle = {Proceedings of the 3rd Asian Semantic Web Conference on The Semantic Web}, doi = {10.1007/978-3-540-89704-0_18}, interhash = {44401088956f59c92c11f6a910ed4df4}, intrahash = {dc00da9179d556ce047c1b41eb815e21}, isbn = {978-3-540-89703-3}, location = {Bangkok, Thailand}, numpages = {15}, pages = {257--271}, publisher = {Springer-Verlag}, series = {ASWC '08}, title = {The Art of Tagging: Measuring the Quality of Tags}, url = {http://dx.doi.org/10.1007/978-3-540-89704-0_18}, year = 2008 } @inproceedings{conf/sigir/GuanBMCW09, author = {Guan, Ziyu and Bu, Jiajun and Mei, Qiaozhu and Chen, Chun and Wang, Can}, booktitle = {SIGIR}, crossref = {conf/sigir/2009}, editor = {Allan, James and Aslam, Javed A. and Sanderson, Mark and Zhai, ChengXiang and Zobel, Justin}, ee = {http://doi.acm.org/10.1145/1571941.1572034}, interhash = {53d2e8bc966048bc01efcc57b2fc8250}, intrahash = {ac9427acf51cbf7cb5a35f66a16a32c0}, isbn = {978-1-60558-483-6}, pages = {540-547}, publisher = {ACM}, title = {Personalized tag recommendation using graph-based ranking on multi-type interrelated objects.}, url = {http://www-personal.umich.edu/~qmei/pub/sigir09-tag.pdf}, year = 2009 } @inproceedings{mitchell2015, author = {Mitchell, T. and Cohen, W. and Hruscha, E. and Talukdar, P. and Betteridge, J. and Carlson, A. and Dalvi, B. and Gardner, M. and Kisiel, B. and Krishnamurthy, J. and Lao, N. and Mazaitis, K. and Mohammad, T. and Nakashole, N. and Platanios, E. and Ritter, A. and Samadi, M. and Settles, B. and Wang, R. and Wijaya, D. and Gupta, A. and Chen, X. and Saparov, A. and Greaves, M. and Welling, J.}, booktitle = {AAAI}, interhash = {52d0d71f6f5b332dabc1412f18e3a93d}, intrahash = {63070703e6bb812852cca56574aed093}, note = {: Never-Ending Learning in AAAI-2015}, title = {Never-Ending Learning}, url = {http://www.cs.cmu.edu/~wcohen/pubs.html}, year = 2015 } @article{chen2009interaction, abstract = {A critiquing-based recommender system acts like an artificial salesperson. It engages users in a conversational dialog where users can provide feedback in the form of critiques to the sample items that were shown to them. The feedback, in turn, enables the system to refine its understanding of the user’s preferences and prediction of what the user truly wants. The system is then able to recommend products that may better stimulate the user’s interest in the next interaction cycle. In this paper, we report our extensive investigation of comparing various approaches in devising critiquing opportunities designed in these recommender systems. More specifically, we have investigated two major design elements which are necessary for a critiquing-based recommender system: }, author = {Chen, Li and Pu, Pearl}, doi = {10.1007/s11257-008-9057-x}, interhash = {a9feffd15221c15c499b2ac98ce7d03a}, intrahash = {f0e063a97473519ca650fe029da73ce7}, issn = {0924-1868}, journal = {User Modeling and User-Adapted Interaction}, language = {English}, number = 3, pages = {167--206}, publisher = {Springer Netherlands}, title = {Interaction design guidelines on critiquing-based recommender systems}, url = {http://dx.doi.org/10.1007/s11257-008-9057-x}, volume = 19, year = 2009 } @article{doan2009information, abstract = {Over the past few years, we have been trying to build an end-to-end system at Wisconsin to manage unstructured data, using extraction, integration, and user interaction. This paper describes the key information extraction (IE) challenges that we have run into, and sketches our solutions. We discuss in particular developing a declarative IE language, optimizing for this language, generating IE provenance, incorporating user feedback into the IE process, developing a novel wiki-based user interface for feedback, best-effort IE, pushing IE into RDBMSs, and more. Our work suggests that IE in managing unstructured data can open up many interesting research challenges, and that these challenges can greatly benefit from the wealth of work on managing structured data that has been carried out by the database community.}, acmid = {1519106}, address = {New York, NY, USA}, author = {Doan, AnHai and Naughton, Jeffrey F. and Ramakrishnan, Raghu and Baid, Akanksha and Chai, Xiaoyong and Chen, Fei and Chen, Ting and Chu, Eric and DeRose, Pedro and Gao, Byron and Gokhale, Chaitanya and Huang, Jiansheng and Shen, Warren and Vuong, Ba-Quy}, doi = {10.1145/1519103.1519106}, interhash = {b80d6ce47b976503692def4e86b0097d}, intrahash = {fccc9f25a1c70cb71d3377a7ddfe1614}, issn = {0163-5808}, issue_date = {December 2008}, journal = {SIGMOD Record}, month = mar, number = 4, numpages = {7}, pages = {14--20}, publisher = {ACM}, title = {Information extraction challenges in managing unstructured data}, url = {http://doi.acm.org/10.1145/1519103.1519106}, volume = 37, year = 2009 } @inproceedings{Dumais:2000:HCW:345508.345593, acmid = {345593}, address = {New York, NY, USA}, author = {Dumais, Susan and Chen, Hao}, booktitle = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, doi = {10.1145/345508.345593}, interhash = {1051e6db3c79db59699a253138bb3b64}, intrahash = {be8e5ee591f98d95ff6ee2f2f227e3be}, isbn = {1-58113-226-3}, location = {Athens, Greece}, numpages = {8}, pages = {256--263}, publisher = {ACM}, series = {SIGIR '00}, title = {Hierarchical classification of Web content}, url = {http://doi.acm.org/10.1145/345508.345593}, year = 2000 } @inproceedings{Lu:2009:ETN:1645953.1646167, abstract = {In this poster, we investigate how to enhance web clustering by leveraging the tripartite network of social tagging systems. We propose a clustering method, called "Tripartite Clustering", which cluster the three types of nodes (resources, users and tags) simultaneously based on the links in the social tagging network. The proposed method is experimented on a real-world social tagging dataset sampled from del.icio.us. We also compare the proposed clustering approach with K-means. All the clustering results are evaluated against a human-maintained web directory. The experimental results show that Tripartite Clustering significantly outperforms the content-based K-means approach and achieves performance close to that of social annotation-based K-means whereas generating much more useful information.}, acmid = {1646167}, address = {New York, NY, USA}, author = {Lu, Caimei and Chen, Xin and Park, E. K.}, booktitle = {Proceeding of the 18th ACM conference on Information and knowledge management}, doi = {10.1145/1645953.1646167}, interhash = {e192e53972f28d78f1ecbffbfea08bed}, intrahash = {86160cf68758ec60922323a34a7833f0}, isbn = {978-1-60558-512-3}, location = {Hong Kong, China}, numpages = {4}, pages = {1545--1548}, publisher = {ACM}, series = {CIKM '09}, title = {Exploit the tripartite network of social tagging for web clustering}, url = {http://doi.acm.org/10.1145/1645953.1646167}, year = 2009 } @inproceedings{hu2008enhancing, author = {Hu, Jian and Fang, Lujun and Cao, Yang and Zeng, Hua-Jun and Li, Hua and Yang, Qiang and Chen, Zheng}, booktitle = {SIGIR}, crossref = {conf/sigir/2008}, editor = {Myaeng, Sung-Hyon and Oard, Douglas W. and Sebastiani, Fabrizio and Chua, Tat-Seng and Leong, Mun-Kew}, ee = {http://doi.acm.org/10.1145/1390334.1390367}, interhash = {0a2878165034dcdfacb9045608ec482a}, intrahash = {76f863a12c0b983ec67682deaec1ada4}, isbn = {978-1-60558-164-4}, pages = {179-186}, publisher = {ACM}, title = {Enhancing text clustering by leveraging Wikipedia semantics.}, url = {http://dblp.uni-trier.de/db/conf/sigir/sigir2008.html#HuFCZLYC08}, year = 2008 } @article{chen2010community, abstract = {We investigate the community structure of physics subfields in the citation network of all Physical Review publications between 1893 and August 2007. We focus on well-cited publications (those receiving more than 100 citations), and apply modularity maximization to uncover major communities that correspond to clearly identifiable subfields of physics. While most of the links between communities connect those with obvious intellectual overlap, there sometimes exist unexpected connections between disparate fields due to the development of a widely applicable theoretical technique or by cross fertilization between theory and experiment. We also examine communities decade by decade and also uncover a small number of significant links between communities that are widely separated in time.}, author = {Chen, P. and Redner, S.}, doi = {10.1016/j.joi.2010.01.001}, interhash = {412bd111704a139dacf9fcb9255ab8a9}, intrahash = {9c56e6737f83d7e3160613ec439c1591}, issn = {1751-1577}, journal = {Journal of Informetrics}, number = 3, pages = {278 - 290}, title = {Community structure of the physical review citation network}, url = {http://www.sciencedirect.com/science/article/pii/S1751157710000027}, volume = 4, year = 2010 } @article{chen2018collaborative, abstract = {This study aims to advance the theory and practice of managing collaborative data networks for information and decision-support services that exist in over 400 US metropolitan areas. Integrating insights from collaborative governance, network management, and cross-boundary information sharing, this study develops a framework to outline the interplay between context, management, collaborative dynamics, technology, and performance. This study further utilizes the framework to conduct an exploratory in-depth case study of a metropolitan transportation data network to examine such interplay. The findings suggest ways to improve the performance of collaborative data networks and their implications are discussed. [ABSTRACT FROM AUTHOR]}, author = {Chen, Yu-Che and Lee, Jooho}, interhash = {6655d25a640f0332e64bc275ec81b76f}, intrahash = {676925e589b15d9a9803e792996c0eb0}, journal = {Public Management Review}, month = {5}, number = 5, title = {Collaborative data networks for public service: governance, management, and performance.}, uniqueid = {128465135|buh}, volume = 20, year = 2018 } @inproceedings{wang2010claper, abstract = {Classical papers are of great help for beginners to get familiar with a new research area. However, digging them out is a difficult problem. This paper proposes Claper, a novel academic recommendation system based on two proven principles: the Principle of Download Persistence and the Principle of Citation Approaching (we prove them based on real-world datasets). The principle of download persistence indicates that classical papers have few decreasing download frequencies since they were published. The principle of citation approaching indicates that a paper which cites a classical paper is likely to cite citations of that classical paper. Our experimental results based on large-scale real-world datasets illustrate Claper can effectively recommend classical papers of high quality to beginners and thus help them enter their research areas.}, author = {Wang, Yonggang and Zhai, Ennan and Hu, Jianbin and Chen, Zhong}, booktitle = {Proceedings of the seventh International Conference on Fuzzy Systems and Knowledge Discovery}, doi = {10.1109/FSKD.2010.5569227}, interhash = {7180ddaf1c1765a45fd244027bd0bf43}, intrahash = {7da72bf2f0538afad9377a0d50c263b4}, month = aug, pages = {2777--2781}, publisher = {IEEE}, title = {Claper: Recommend classical papers to beginners}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5569227}, volume = 6, year = 2010 } @article{langer2000botryobasidium, author = {Langer, G. and Langer, E. and Chen, C.-J.}, interhash = {5c19a706b8523f1bc5903cc0895b981f}, intrahash = {936930038423feb67a987db26909e4c9}, journal = {Mycol. Res}, number = 4, pages = {510-512}, title = {Botryobasidium musaisporum sp. nov. collected in Taiwan}, volume = {104 }, year = 2000 } @article{behm2011asterix, abstract = {ASTERIX is a new data-intensive storage and computing platform project spanning UC Irvine, UC Riverside, and UC San Diego. In this paper we provide an overview of the ASTERIX project, starting with its main goal—the storage and analysis of data pertaining to evolving-world models . We describe the requirements and associated challenges, and explain how the project is addressing them. We provide a technical overview of ASTERIX, covering its architecture, its user model for data and queries, and its approach to scalable query processing and data management. ASTERIX utilizes a new scalable runtime computational platform called Hyracks that is also discussed at an overview level; we have recently made Hyracks available in open source for use by other interested parties. We also relate our work on ASTERIX to the current state of the art and describe the research challenges that we are currently tackling as well as those that lie ahead.}, address = {Netherlands}, affiliation = {University of California, Irvine, USA}, author = {Behm, Alexander and Borkar, Vinayak and Carey, Michael and Grover, Raman and Li, Chen and Onose, Nicola and Vernica, Rares and Deutsch, Alin and Papakonstantinou, Yannis and Tsotras, Vassilis}, doi = {10.1007/s10619-011-7082-y}, interhash = {3e06363406f716c5d9340dc2c693adb3}, intrahash = {42d96cc4877943527a9259424c584740}, issn = {0926-8782}, journal = {Distributed and Parallel Databases}, keyword = {Computer Science}, number = 3, pages = {185--216}, publisher = {Springer}, title = {ASTERIX: towards a scalable, semistructured data platform for evolving-world models}, url = {http://dx.doi.org/10.1007/s10619-011-7082-y}, volume = 29, year = 2011 } @article{alsubaiee2012asterix, abstract = {At UC Irvine, we are building a next generation parallel database system, called ASTERIX, as our approach to addressing today's "Big Data" management challenges. ASTERIX aims to combine time-tested principles from parallel database systems with those of the Web-scale computing community, such as fault tolerance for long running jobs. In this demo, we present a whirlwind tour of ASTERIX, highlighting a few of its key features. We will demonstrate examples of our data definition language to model semi-structured data, and examples of interesting queries using our declarative query language. In particular, we will show the capabilities of ASTERIX for answering geo-spatial queries and fuzzy queries, as well as ASTERIX' data feed construct for continuously ingesting data.}, acmid = {2367532}, author = {Alsubaiee, Sattam and Altowim, Yasser and Altwaijry, Hotham and Behm, Alexander and Borkar, Vinayak and Bu, Yingyi and Carey, Michael and Grover, Raman and Heilbron, Zachary and Kim, Young-Seok and Li, Chen and Onose, Nicola and Pirzadeh, Pouria and Vernica, Rares and Wen, Jian}, interhash = {ae521b66302adb1b7df3f4cdb8d92181}, intrahash = {003f2654ae41861cfb77bf0353634ac3}, issn = {2150-8097}, issue_date = {August 2012}, journal = {Proceedings of the VLDB Endowment}, month = aug, number = 12, numpages = {4}, pages = {1898--1901}, publisher = {VLDB Endowment}, title = {ASTERIX: an open source system for "Big Data" management and analysis (demo)}, url = {http://dl.acm.org/citation.cfm?id=2367502.2367532}, volume = 5, year = 2012 } @inproceedings{dong2009overview, author = {Dong, Xishuang and Chen, Xiaodong and Guan, Yi and Yu, Zhiming and Li, Sheng}, booktitle = {CSIE (3)}, crossref = {conf/csie/2009}, editor = {Burgin, Mark and Chowdhury, Masud H. and Ham, Chan H. and Ludwig, Simone A. and Su, Weilian and Yenduri, Sumanth}, ee = {http://doi.ieeecomputersociety.org/10.1109/CSIE.2009.1090}, interhash = {038285e30e929088afad8d82c066ef75}, intrahash = {d970cfabe05f5e19100099afa11b9873}, isbn = {978-0-7695-3507-4}, pages = {600-606}, publisher = {IEEE Computer Society}, title = {An Overview of Learning to Rank for Information Retrieval.}, url = {http://dblp.uni-trier.de/db/conf/csie/csie2009-3.html#DongCGYL09}, year = 2009 } @inproceedings{liu2005experimental, abstract = {Taxonomies of the Web typically have hundreds of thousands of categories and skewed category distribution over documents. It is not clear whether existing text classification technologies can perform well on and scale up to such large-scale applications. To understand this, we conducted the evaluation of several representative methods (Support Vector Machines, k-Nearest Neighbor and Naive Bayes) with Yahoo! taxonomies. In particular, we evaluated the effectiveness/efficiency tradeoff in classifiers with hierarchical setting compared to conventional (flat) setting, and tested popular threshold tuning strategies for their scalability and accuracy in large-scale classification problems.}, acmid = {1062891}, address = {New York, NY, USA}, author = {LIU, Tie-Yan and YANG, Yiming and WAN, Hao and ZHOU, Qian and GAO, Bin and ZENG, Hua-Jun and CHEN, Zheng and MA, Wei-Ying}, booktitle = {Special interest tracks and posters of the 14th international conference on World Wide Web}, doi = {10.1145/1062745.1062891}, interhash = {e581e4dd2ed6d748031a812c724c4b7c}, intrahash = {36cc9f92b9c722b2aff441b23e44b2f7}, isbn = {1-59593-051-5}, location = {Chiba, Japan}, numpages = {2}, pages = {1106--1107}, publisher = {ACM}, series = {WWW '05}, title = {An experimental study on large-scale web categorization}, url = {http://doi.acm.org/10.1145/1062745.1062891}, year = 2005 } @article{chen2007reputation, abstract = {In this paper, we propose a user reputation model and apply it to a user-interactive question answering system. It combines the social network analysis approach and the user rating approach. Social network analysis is applied to analyze the impact of participant users' relations to their reputations. User rating is used to acquire direct judgment of a user's reputation based on other users' experiences with this user. Preliminary experiments show that the computed reputations based on our proposed reputation model can reflect the actual reputations of the simulated roles and therefore can fit in well with our user-interactive question answering system. Copyright © 2006 John Wiley & Sons, Ltd.}, author = {Chen, Wei and Zeng, Qingtian and Wenyin, Liu and Hao, Tianyong}, doi = {10.1002/cpe.1142}, interhash = {c304f655ee6ee183e07192b9fed0d618}, intrahash = {858df3646b706ce6308a12cbf1585d58}, issn = {1532-0634}, journal = {Concurrency and Computation: Practice and Experience}, number = 15, pages = {2091--2103}, publisher = {John Wiley & Sons, Ltd.}, title = {A user reputation model for a user-interactive question answering system}, url = {http://dx.doi.org/10.1002/cpe.1142}, volume = 19, year = 2007 } @inproceedings{yuen2009survey, abstract = {Human computation is a technique that makes use of human abilities for computation to solve problems. The human computation problems are the problems those computers are not good at solving but are trivial for humans. In this paper, we give a survey of various human computation systems which are categorized into initiatory human computation, distributed human computation and social game-based human computation with volunteers, paid engineers and online players. For the existing large number of social games, some previous works defined various types of social games, but the recent developed social games cannot be categorized based on the previous works. In this paper, we define the categories and the characteristics of social games which are suitable for all existing ones. Besides, we present a survey on the performance aspects of human computation system. This paper gives a better understanding on human computation system.}, author = {Yuen, Man-Ching and Chen, Ling-Jyh and King, I.}, booktitle = {Proceedings of the International Conference on Computational Science and Engineering, CSE '09}, doi = {10.1109/CSE.2009.395}, interhash = {69f9bd3e6a721f226e39e1f990e20286}, intrahash = {8670a20dbf6aa9dd21da81ab78a1e333}, month = aug, pages = {723--728}, title = {A Survey of Human Computation Systems}, url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5283450&tag=1}, volume = 4, year = 2009 } @incollection{noKey, author = {Yang, Wei-Ting and Luo, Zhiqiang and Chen, I-Ming and Yeo, SongHuat}, booktitle = {ROMANSY 18 Robot Design, Dynamics and Control}, doi = {10.1007/978-3-7091-0277-0_52}, editor = {Parenti Castelli, Vincenzo and Schiehlen, Werner}, interhash = {d394d3bffeeef41689674a40710ef770}, intrahash = {87c47f272197da68f301b3529faaeb99}, isbn = {978-3-7091-0276-3}, language = {English}, pages = {441-448}, publisher = {Springer Vienna}, series = {CISM International Centre for Mechanical Sciences}, title = {A Method for Comparing Human Postures from Motion Capture Data}, url = {http://dx.doi.org/10.1007/978-3-7091-0277-0_52}, volume = 524, year = 2010 }