Baeza-Yates, R. A.
(2004):
A Fast Set Intersection Algorithm for Sorted Sequences..
In: Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching, CPM 2004,
[Volltext]
[Kurzfassung] [BibTeX][Endnote]

This paper introduces a simple intersection algorithm for two sorted sequences that is fast on average. It is related to the multiple searching problem and to merging. We present the worst and average case analysis, showing that in the former, the complexity nicely adapts to the smallest list size. In the later case, it performs less comparisons than the total number of elements on both inputs when n = agr m (agr > 1). Finally, we show its application to fast query processing in Web search engines, where large intersections, or differences, must be performed fast.

@inproceedings{conf/cpm/Baeza-Yates04,
author = {Baeza-Yates, Ricardo A.},
title = {A Fast Set Intersection Algorithm for Sorted Sequences.},
editor = {Sahinalp, Suleyman Cenk and Muthukrishnan, S. and Dogrusoz, Ugur},
booktitle = {Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching, CPM 2004},
year = {2004},
pages = {400-408},
url = {http://www.springerlink.com/index/YTH9H90Y94N10L7E.pdf},
keywords = {algorithm, intersection, fast, set, merge},
abstract = {This paper introduces a simple intersection algorithm for two sorted sequences that is fast on average. It is related to the multiple searching problem and to merging. We present the worst and average case analysis, showing that in the former, the complexity nicely adapts to the smallest list size. In the later case, it performs less comparisons than the total number of elements on both inputs when n = agr m (agr > 1). Finally, we show its application to fast query processing in Web search engines, where large intersections, or differences, must be performed fast.}
}

%0 = inproceedings
%A = Baeza-Yates, Ricardo A.
%B = Proceedings of the 15th Annual Symposium on Combinatorial Pattern Matching, CPM 2004
%D = 2004
%T = A Fast Set Intersection Algorithm for Sorted Sequences.
%U = http://www.springerlink.com/index/YTH9H90Y94N10L7E.pdf