@inproceedings{parameswaran2011answering, abstract = {For some problems, human assistance is needed in addition to automated (algorithmic) computation. In sharp contrast to existing data management approaches, where human input is either ad-hoc or is never used, we describe the design of the first declarative language involving human-computable functions, standard relational operators, as well as algorithmic computation. We consider the challenges involved in optimizing queries posed in this language, in particular, the tradeoffs between uncertainty, cost and performance, as well as combination of human and algorithmic evidence. We believe that the vision laid out in this paper can act as a road-map for a new area of data management research where human computation is routinely used in data analytics.}, author = {Parameswaran, Aditya and Polyzotis, Neoklis}, booktitle = {Conference on Inovative Data Systems Research (CIDR 2011)}, interhash = {037601fdcba1c499a3e89b1427235489}, intrahash = {8c11ab0f21767c79cd694a795eddf169}, month = jan, pages = {160--166}, title = {Answering Queries using Humans, Algorithms and Databases}, url = {http://ilpubs.stanford.edu:8090/986/}, year = 2011 } @inproceedings{little2010turkit, abstract = {Mechanical Turk (MTurk) provides an on-demand source of human computation. This provides a tremendous opportunity to explore algorithms which incorporate human computation as a function call. However, various systems challenges make this difficult in practice, and most uses of MTurk post large numbers of independent tasks. TurKit is a toolkit for prototyping and exploring algorithmic human computation, while maintaining a straight-forward imperative programming style. We present the crash-and-rerun programming model that makes TurKit possible, along with a variety of applications for human computation algorithms. We also present case studies of TurKit used for real experiments across different fields.}, acmid = {1866040}, address = {New York, NY, USA}, author = {Little, Greg and Chilton, Lydia B. and Goldman, Max and Miller, Robert C.}, booktitle = {Proceedings of the 23nd annual ACM symposium on User interface software and technology}, doi = {10.1145/1866029.1866040}, interhash = {a2b44d507345037242e3590eee0ab671}, intrahash = {e364db8e8ee1992a0fdb5f37f425b1a7}, isbn = {978-1-4503-0271-5}, location = {New York, New York, USA}, numpages = {10}, pages = {57--66}, publisher = {ACM}, title = {TurKit: human computation algorithms on mechanical turk}, url = {http://doi.acm.org/10.1145/1866029.1866040}, year = 2010 } @book{knuth2011combinatorial, asin = {0201038048}, author = {Knuth, Donald E.}, dewey = {001.642}, ean = {9780201038040}, interhash = {276f5451e5ece797ff2c247aaa2866a8}, intrahash = {481baec64f095b2555e780614e5c8f13}, isbn = {0201038048}, month = jan, publisher = {Addison-Wesley Professional}, series = {The Art of Computer Programming}, title = {Combinatorial Algorithms}, url = {http://www.amazon.com/Art-Computer-Programming-Combinatorial-Algorithms/dp/0201038048/ref=sr_1_1/179-7432054-1264222?ie=UTF8&s=books&qid=1298901523&sr=8-1}, volume = {4A}, year = 2011 } @article{boyer1977string, abstract = {An algorithm is presented that searches for the location, “il” of the first occurrence of a character string, “pat,” in another string, “string.” During the search operation, the characters of pat are matched starting with the last character of pat. The information gained by starting the match at the end of the pattern often allows the algorithm to proceed in large jumps through the text being searched. Thus the algorithm has the unusual property that, in most cases, not all of the first i characters of string are inspected. The number of characters actually inspected (on the average) decreases as a function of the length of pat. For a random English pattern of length 5, the algorithm will typically inspect i/4 characters of string before finding a match at i. Furthermore, the algorithm has been implemented so that (on the average) fewer than i + patlen machine instructions are executed. These conclusions are supported with empirical evidence and a theoretical analysis of the average behavior of the algorithm. The worst case behavior of the algorithm is linear in i + patlen, assuming the availability of array space for tables linear in patlen plus the size of the alphabet. 3~}, address = {New York, NY, USA}, author = {Boyer, Robert S. and Moore, J. Strother}, doi = {10.1145/359842.359859}, interhash = {4df7c63c9e5aa054a20b177265d6fe79}, intrahash = {bf45f38d1fd1a07a8690339f5a829818}, issn = {0001-0782}, journal = {Communications of the ACM}, month = oct, number = 10, pages = {762--772}, publisher = {ACM}, title = {A fast string searching algorithm}, url = {http://portal.acm.org/citation.cfm?id=359859}, volume = 20, year = 1977 } @article{battista1994algorithms, abstract = {Several data presentation problems involve drawing graphs so that they are easy to read and understand. Examples include circuit schematics and software engineering diagrams. In this paper we present a bibliographic survey on algorithms whose goal is to produce aesthetically pleasing drawings of graphs. Research on this topic is spread over the broad spectrum of Computer Science. This bibliography constitutes an attempt to encompass both theoretical and application oriented papers from disparate areas.}, address = {Amsterdam, The Netherlands}, author = {Battista, Giuseppe Di and Eades, Peter and Tamassia, Roberto and Tollis, Ioannis G.}, doi = {10.1016/0925-7721(94)00014-X}, interhash = {c8010ae4e2e0aedf21607631ac79bed4}, intrahash = {a62ded9eee008659ffa33d294b8de870}, issn = {0925-7721}, journal = {Computational Geometry: Theory and Applications}, number = 5, pages = {235--282}, publisher = {Elsevier Science Publishers B. V.}, title = {Algorithms for drawing graphs: an annotated bibliography}, url = {http://portal.acm.org/citation.cfm?id=195598}, volume = 4, year = 1994 } @book{knuth1998art2, address = {Boston, MA, USA}, author = {Knuth, Donald E.}, edition = {2nd}, interhash = {f8866d240e847372a483fdc7bd741523}, intrahash = {bbcd4d56e619911172e571d36bb28f3b}, isbn = {0-201-89685-0}, publisher = {Addison-Wesley Longman Publishing Co.}, title = {The art of computer programming}, url = {http://portal.acm.org/citation.cfm?id=270146}, volume = 3, year = 1998 } @inproceedings{orlando02efficient, abstract = {Due to the huge increase in the number and dimension of available databases, efficient solutions for counting frequent sets are nowadays very important within the Data Mining community. Several sequential and parallel algorithms were proposed, whichin many cases exhibit excellent scalability. In this paper we present ParDCI, a distributed and multithreaded algorithm forcounting the occurrences of frequent sets within transactional databases. ParDCI is a parallel version of DCI (Direct Count& Intersect), a multi-strategy algorithm which is able to adapt its behavior not only to the features of the specific computingplatform (e.g. available memory), but also to the features of the dataset being processed (e.g. sparse or dense datasets).ParDCI enhances previous proposals by exploiting the highly optimized counting and intersection techniques of DCI, and byrelying on a multi-level parallelization approachwh ichex plicitly targets clusters of SMPs, an emerging computing platform.We focused our work on the efficient exploitation of the underlying architecture. Intra-Node multithreading effectively exploitsthe memory hierarchies of each SMP node, while Inter-Node parallelism exploits smart partitioning techniques aimed at reducingcommunication overheads. In depth experimental evaluations demonstrate that ParDCI reaches nearly optimal performances undera variety of conditions.}, author = {Orlando, Salvatore and Palmerini, Paolo and Perego, Raffaele and Silvestri, Fabrizio}, booktitle = {High Performance Computing for Computational Science — VECPAR 2002}, interhash = {50c17d100341c01892f7dd8fbd7deb69}, intrahash = {522c68b8bb5e28f1bf9f1e11e612f542}, pages = {3--29}, title = {An Efficient Parallel and Distributed Algorithm for Counting Frequent Sets}, url = {http://dx.doi.org/10.1007/3-540-36569-9_28}, year = 2003 } @article{611918, abstract = {Text books, including books for general audiences, invariably mention bubble sort in discussions of elementary sorting algorithms. We trace the history of bubble sort, its popularity, and its endurance in the face of pedagogical assertions that code and algorithmic examples used in early courses should be of high quality and adhere to established best practices. This paper is more an historical analysis than a philosophical treatise for the exclusion of bubble sort from books and courses. However, sentiments for exclusion are supported by Knuth [17], "In short, the bubble sort seems to have nothing to recommend it, except a catchy name and the fact that it leads to some interesting theoretical problems." Although bubble sort may not be a best practice sort, perhaps the weight of history is more than enough to compensate and provide for its longevity.}, address = {New York, NY, USA}, author = {Astrachan, Owen}, doi = {http://doi.acm.org/10.1145/792548.611918}, interhash = {a7b4dd529c860c4c616849b8438862f8}, intrahash = {68e8985978a6c6abd57b7bbef5740d59}, issn = {0097-8418}, journal = {SIGCSE Bull.}, number = 1, pages = {1--5}, publisher = {ACM}, title = {Bubble sort: an archaeological algorithmic analysis}, url = {http://portal.acm.org/citation.cfm?id=611918&dl=GUIDE,}, volume = 35, year = 2003 } @article{wu2008wu, abstract = {This paper presents the top 10 data mining algorithms identified by the IEEE International Conference on Data Mining (ICDM) in December 2006: C4.5, k-Means, SVM, Apriori, EM, PageRank, AdaBoost, kNN, Naive Bayes, and CART. These top 10 algorithms are among the most influential data mining algorithms in the research community.With each algorithm, we provide a description of the algorithm, discuss the impact of the algorithm, and review current andfurther research on the algorithm. These 10 algorithms cover classification, clustering, statistical learning, associationanalysis, and link mining, which are all among the most important topics in data mining research and development.}, address = {London}, author = {Wu, Xindong and Kumar, Vipin and Quinlan, J. Ross and Ghosh, Joydeep and Yang, Qiang and Motoda, Hiroshi and McLachlan, Geoffrey and Ng, Angus and Liu, Bing and Yu, Philip and Zhou, Zhi-Hua and Steinbach, Michael and Hand, David and Steinberg, Dan}, interhash = {76fd294a34cf85638f6e194a85af8db9}, intrahash = {2c34bb4b49187a6d3e780e78d254ae1f}, issn = {0219-1377}, journal = {Knowledge and Information Systems}, month = Jan, number = 1, pages = {1--37}, publisher = {Springer}, title = {Top 10 algorithms in data mining}, url = {http://dx.doi.org/10.1007/s10115-007-0114-2}, volume = 14, year = 2008 } @inproceedings{troy2007faster, abstract = {We introduce a simple but efficient, multistage algorithm for constructing concept lattices (MCA). A concept lattice can be obtained as the closure system generated from attribute concepts (dually, object concepts). There are two strategies to use this as the basis of an algorithm: (a) forming intersections by joining one attribute concept at a time, and (b) repeatedly forming pairwise intersections starting from the attribute concepts. A straightforward translation of (b) to an algorithm suggests that pairwise intersection be performed among all cumulated concepts. MCA is parsimonious in forming the pairwise intersections: it only performs such operations among the newly formed concepts from the previous stage, instead of cumulatively. We show that this parsimonious multistage strategy is complete: it generates all concepts. To make this strategy really work, one must overcome the need to eliminate duplicates (and potentially save time even further), since concepts generated at a later stage may have already appeared in one of the earlier stages. As considered in several other algorithms in the literature [5], we achieve this by an auxiliary search tree which keeps all existing concepts as paths from the root to a flagged node or a leaf. The depth of the search tree is bounded by the total number of attributes, and hence the time complexity for concept lookup is bounded by the logarithm of the total number of concepts. For constructing lattice diagrams, we adapt a sub-quadratic algorithm of Pritchard [9] for computing subset partial orders to constructing the Hasse diagrams. Instead of the standard expected quadratic complexity, the Pritchard approach achieves a worst-case time O(N2/log N). Our experimental results showed significant improvements in speed for a variety of input profiles against three leading algorithms considered in the comprehensive comparative study [5]: Bordat, Chein, and Norris.}, address = {Berlin, Heidelberg}, author = {Troy, Adam D. and Zhang, Guo-Qiang and Tian, Ye}, booktitle = {Proceedings of the 15th International Conference on Conceptual Structures (ICCS 2007)}, editor = {Priss, U. and Polovina, S. and Hill, R.}, interhash = {f2dbcacf668b206e7020a9a5a3621daf}, intrahash = {94f0a8c1cb1719df010e74056193e6b4}, pages = {206--219}, publisher = {Springer-Verlag}, series = {Lecture Notes in Artificial Intelligence}, title = {Faster Concept Analysis}, url = {http://newton.cwru.edu/papers/MCA.pdf}, volume = 4604, year = 2007 } @inproceedings{jaeschke2006trias, address = {Hong Kong}, author = {Jäschke, Robert and Hotho, Andreas and Schmitz, Christoph and Ganter, Bernhard and Stumme, Gerd}, booktitle = {Proceedings of the 6th IEEE International Conference on Data Mining (ICDM 06)}, doi = {http://doi.ieeecomputersociety.org/10.1109/ICDM.2006.162}, interhash = {b4964c3bdd2991a80873d7080ef6a73e}, intrahash = {e387c294129e11f4221514d5fa807e26}, isbn = {0-7695-2701-9}, issn = {1550-4786}, month = {December}, pages = {907-911}, publisher = {IEEE Computer Society}, title = {TRIAS - An Algorithm for Mining Iceberg Tri-Lattices}, url = {http://www.kde.cs.uni-kassel.de/pub/pdf/jaeschke2006trias.pdf}, vgwort = {19}, year = 2006 } @inproceedings{686448, address = {London, UK}, author = {Robardet, Celine and Feschet, Fabien}, booktitle = {IDEAL '00: Proceedings of the Second International Conference on Intelligent Data Engineering and Automated Learning, Data Mining, Financial Engineering, and Intelligent Agents}, interhash = {4f84f6ea55fa08abf881620b0e0a5c08}, intrahash = {fc7666de531b96ae956d933ea88be3cd}, isbn = {3-540-41450-9}, pages = {565--570}, publisher = {Springer-Verlag}, title = {A New Methodology to Compare Clustering Algorithms}, year = 2000 } @inproceedings{conf/cpm/Baeza-Yates04, abstract = {This paper introduces a simple intersection algorithm for two sorted sequences that is fast on average. It is related to the multiple searching problem and to merging. We present the worst and average case analysis, showing that in the former, the complexity nicely adapts to the smallest list size. In the later case, it performs less comparisons than the total number of elements on both inputs when n = agr m (agr > 1). Finally, we show its application to fast query processing in Web search engines, where large intersections, or differences, must be performed fast.}, author = {Baeza-Yates, Ricardo A.}, booktitle = {Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching, CPM 2004}, editor = {Sahinalp, Suleyman Cenk and Muthukrishnan, S. and Dogrusoz, Ugur}, interhash = {a71b1a4a88d4228da47271d744a1a97b}, intrahash = {ac1a8233f1ea5edb39d834f943390cfc}, pages = {400-408}, title = {A Fast Set Intersection Algorithm for Sorted Sequences.}, url = {http://www.springerlink.com/index/YTH9H90Y94N10L7E.pdf}, year = 2004 } @article{newman03fast, author = {Newman, M.E.J.}, interhash = {4493f03106eb8dd9db41c0ef3f667bb3}, intrahash = {56de7e6d214faebdbf2f2ef0fce09d7d}, journal = {Physical Review E}, month = {September}, title = {Fast algorithm for detecting community structure in networks}, url = {http://arxiv.org/abs/cond-mat/0309508}, volume = 69, year = 2003 } @incollection{ganter87algorithmen, author = {Ganter, B.}, booktitle = {Beiträge zur Begriffsanalyse }, editor = {Ganter, B. and Wille, R. and Wolff, K. E.}, interhash = {9774f54b5e5334ed781618385ded2e75}, intrahash = {298def2cb3fc83ae1ff185763548df53}, pages = {241-254}, publisher = {B.I. Wissenschaftsverlag }, title = {{Algorithmen zur Formalen Begriffsanalyse}}, year = 1987 }