@article{Sintorn20081381, abstract = {This paper presents an algorithm for fast sorting of large lists using modern GPUs. The method achieves high speed by efficiently utilizing the parallelism of the \{GPU\} throughout the whole algorithm. Initially, GPU-based bucketsort or quicksort splits the list into enough sublists then to be sorted in parallel using merge-sort. The algorithm is of complexity n log n , and for lists of 8 M elements and using a single Geforce 8800 GTS-512, it is 2.5 times as fast as the bitonic sort algorithms, with standard complexity of n ( log n ) 2 , which for a long time was considered to be the fastest for \{GPU\} sorting. It is 6 times faster than single \{CPU\} quicksort, and 10% faster than the recent GPU-based radix sort. Finally, the algorithm is further parallelized to utilize two graphics cards, resulting in yet another 1.8 times speedup. }, author = {Sintorn, Erik and Assarsson, Ulf}, doi = {10.1016/j.jpdc.2008.05.012}, interhash = {db7c140766f1848f9a6f74f693d810d8}, intrahash = {0b72bee3ea941e13dec3f706c0f4362b}, issn = {0743-7315}, journal = {Journal of Parallel and Distributed Computing }, note = {General-Purpose Processing using Graphics Processing Units }, number = 10, pages = {1381 - 1388}, title = {Fast parallel GPU-sorting using a hybrid algorithm }, url = {http://www.sciencedirect.com/science/article/pii/S0743731508001196}, volume = 68, year = 2008 }