@misc{yu2013largescale,
  abstract = {The multi-label classification problem has generated significant interest in
recent years. However, existing approaches do not adequately address two key
challenges: (a) the ability to tackle problems with a large number (say
millions) of labels, and (b) the ability to handle data with missing labels. In
this paper, we directly address both these problems by studying the multi-label
problem in a generic empirical risk minimization (ERM) framework. Our
framework, despite being simple, is surprisingly able to encompass several
recent label-compression based methods which can be derived as special cases of
our method. To optimize the ERM problem, we develop techniques that exploit the
structure of specific loss functions - such as the squared loss function - to
offer efficient algorithms. We further show that our learning framework admits
formal excess risk bounds even in the presence of missing labels. Our risk
bounds are tight and demonstrate better generalization performance for low-rank
promoting trace-norm regularization when compared to (rank insensitive)
Frobenius norm regularization. Finally, we present extensive empirical results
on a variety of benchmark datasets and show that our methods perform
significantly better than existing label compression based methods and can
scale up to very large datasets such as the Wikipedia dataset.},
  author = {Yu, Hsiang-Fu and Jain, Prateek and Kar, Purushottam and Dhillon, Inderjit S.},
  interhash = {1252173520757338468a68e028494647},
  intrahash = {716e5270c1dcb3a1e4eedf9934859021},
  note = {cite arxiv:1307.5101},
  title = {Large-scale Multi-label Learning with Missing Labels},
  url = {http://arxiv.org/abs/1307.5101},
  year = 2013
}

@proceedings{thierrypoibeau2013multisource,
  abstract = {Information extraction (IE) and text summarization (TS) are powerful technologies for finding relevant pieces of information in text and presenting them to the user in condensed form. The ongoing information explosion makes IE and TS critical for successful functioning within the information society. These technologies face particular challenges due to the inherent multi-source nature of the information explosion.  The technologies must now handle not isolated texts or individual narratives, but rather large-scale repositories and streams--in general, in multiple languages--containing a multiplicity of perspectives, opinions, or commentaries on particular topics, entities or events.  There is thus a need to adapt existing techniques and develop new ones to deal with these challenges. This volume contains a selection of papers that present a variety of methodologies for content identification and extraction, as well as for content fusion and regeneration. The chapters cover various aspects of the challenges, depending on the nature of the information sought--names vs. events,-- and the nature of the sources--news streams vs. image captions vs. scientific research papers, etc. This volume aims to offer a broad and representative sample of studies from this very active research field.},
  address = {Berlin; New York},
  editor = {Poibeau, Thierry and Saggion, Horacio and Piskorski, Jakub and Yangarber, Roman},
  interhash = {b1d51398d5660ed1e16f40d74cc815db},
  intrahash = {21816f2809a2b58397acce5ac9558d28},
  isbn = {9783642285691 3642285694 3642285686 9783642285684},
  publisher = {Springer},
  refid = {808368416},
  title = {Multi-source, multilingual information extraction and summarization},
  url = {http://link.springer.com/book/10.1007/978-3-642-28569-1},
  year = 2013
}

@article{LinLT12,
  author = {Lin, Chun-Yuan and Lee, Wei Sheng and Tang, Chuan Yi},
  ee = {http://dx.doi.org/10.4018/jghpc.2012040101},
  interhash = {f997386bf7a538a5325828cbdb3c5074},
  intrahash = {819d2f02cc699fd0f3ccfb172e60dce9},
  journal = {IJGHPC},
  number = 2,
  pages = {1-16},
  title = {Parallel Shellsort Algorithm for Many-Core GPUs with CUDA.},
  url = {http://dblp.uni-trier.de/db/journals/ijghpc/ijghpc4.html#LinLT12},
  volume = 4,
  year = 2012
}

@misc{titov2008modeling,
  abstract = {In this paper we present a novel framework for extracting the ratable aspects of objects from online user reviews. Extracting such aspects is an important challenge in automatically mining product opinions from the web and in generating opinion-based summaries of user reviews. Our models are based on extensions to standard topic modeling methods such as LDA and PLSA to induce multi-grain topics. We argue that multi-grain models are more appropriate for our task since standard models tend to produce topics that correspond to global properties of objects (e.g., the brand of a product type) rather than the aspects of an object that tend to be rated by a user. The models we present not only extract ratable aspects, but also cluster them into coherent topics, e.g., `waitress' and `bartender' are part of the same topic `staff' for restaurants. This differentiates it from much of the previous work which extracts aspects through term frequency analysis with minimal clustering. We evaluate the multi-grain models both qualitatively and quantitatively to show that they improve significantly upon standard topic models.},
  author = {Titov, Ivan and McDonald, Ryan},
  interhash = {00cbf1df09c3f2c65d5a31a0537aed3f},
  intrahash = {f3286f5efa0115f465563d0259c32255},
  note = {cite arxiv:0801.1063},
  title = {Modeling Online Reviews with Multi-grain Topic Models},
  url = {http://arxiv.org/abs/0801.1063},
  year = 2008
}

@incollection{wrobel1997algorithm,
  address = {Berlin / Heidelberg},
  affiliation = {Schloß Birlinghoven GMD, FIT. KI 53754 Sankt Augustin Germany 53754 Sankt Augustin Germany},
  author = {Wrobel, Stefan},
  booktitle = {Principles of Data Mining and Knowledge Discovery},
  doi = {10.1007/3-540-63223-9_108},
  editor = {Komorowski, Jan and Zytkow, Jan},
  interhash = {9ac8bc696e0d28389e65f03632abb10d},
  intrahash = {e482ea217486f1d84e4d4688b78f774f},
  isbn = {978-3-540-63223-8},
  keyword = {Computer Science},
  pages = {78-87},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {An algorithm for multi-relational discovery of subgroups},
  url = {http://dx.doi.org/10.1007/3-540-63223-9_108},
  volume = 1263,
  year = 1997
}

@inproceedings{lee00algorithms,
  author = {Lee, Daniel D. and Seung, H. Sebastian},
  booktitle = {{NIPS}},
  interhash = {cf8707cab8812be3c21d3e5c10fad477},
  intrahash = {a54d0f1fa298d6e6a7135fa56b80fb5e},
  pages = {556-562},
  title = {Algorithms for Non-negative Matrix Factorization},
  url = {/brokenurl#citeseer.ist.psu.edu/lee01algorithms.html},
  year = 2000
}

@article{lee99,
  author = {Lee, Daniel D. and Seung, H. Sebastian},
  interhash = {b293699f3a0541bd6fb1b10d7f9410ac},
  intrahash = {ba1100135fbba9782a6052b95e6f9b05},
  journal = {Nature},
  key = {Lee and Seung},
  pages = {788--791},
  title = {Learning the parts of objects by nonnegative matrix factorization},
  volume = 401,
  year = 1999
}

@inproceedings{conf/pkdd/ReadPHF09,
  author = {Read, Jesse and Pfahringer, Bernhard and Holmes, Geoffrey and Frank, Eibe},
  booktitle = {ECML/PKDD (2)},
  crossref = {conf/pkdd/2009-2},
  date = {2009-08-31},
  editor = {Buntine, Wray L. and Grobelnik, Marko and Mladenic, Dunja and Shawe-Taylor, John},
  ee = {http://dx.doi.org/10.1007/978-3-642-04174-7_17},
  interhash = {d07ad188ba08d6931d30643b849de079},
  intrahash = {ab264cc42b2f1530ab6da09aaf5fa0fc},
  isbn = {978-3-642-04173-0},
  pages = {254-269},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {Classifier Chains for Multi-label Classification.},
  url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2009-2.html#ReadPHF09},
  volume = 5782,
  year = 2009
}

@inproceedings{conf/pkdd/MaesPDG09,
  abstract = {Collective classification refers to the classification of interlinked and relational objects described as nodes in a graph. The Iterative Classification Algorithm (ICA) is a simple, efficient and widely used method to solve this problem. It is representative of a family of methods for which inference proceeds as an iterative process: at each step, nodes of the graph are classified according to the current predicted labels of their neighbors. We show that learning in this class of models suffers from a training bias. We propose a new family of methods, called Simulated ICA, which helps reducing this training bias by simulating inference during learning. Several variants of the method are introduced. They are both simple, efficient and scale well. Experiments performed on a series of 7 datasets show that the proposed methods outperform representative state-of-the-art algorithms while keeping a low complexity.},
  author = {Maes, Francis and Peters, Stéphane and Denoyer, Ludovic and Gallinari, Patrick},
  booktitle = {ECML/PKDD (2)},
  crossref = {conf/pkdd/2009-2},
  date = {2009-08-31},
  editor = {Buntine, Wray L. and Grobelnik, Marko and Mladenic, Dunja and Shawe-Taylor, John},
  ee = {http://dx.doi.org/10.1007/978-3-642-04174-7_4},
  interhash = {91c999fb8704c3e4301df8c967a1c711},
  intrahash = {6308dba1d66e8118b891c0e75273b0a7},
  isbn = {978-3-642-04173-0},
  pages = {47-62},
  publisher = {Springer},
  series = {Lecture Notes in Computer Science},
  title = {Simulated Iterative Classification A New Learning Procedure for Graph Labeling.},
  url = {http://dblp.uni-trier.de/db/conf/pkdd/pkdd2009-2.html#MaesPDG09},
  volume = 5782,
  year = 2009
}

@article{weiwei2009combining,
  abstract = {Multilabel classification is an extension of conventional classification in which a single instance can be associated with
multiple labels. Recent research has shown that, just like for conventional classification, instance-based learning algorithmsrelying on the nearest neighbor estimation principle can be used quite successfully in this context. However, since hithertoexisting algorithms do not take correlations and interdependencies between labels into account, their potential has not yetbeen fully exploited. In this paper, we propose a new approach to multilabel classification, which is based on a frameworkthat unifies instance-based learning and logistic regression, comprising both methods as special cases. This approach allowsone to capture interdependencies between labels and, moreover, to combine model-based and similarity-based inference for multilabelclassification. As will be shown by experimental studies, our approach is able to improve predictive accuracy in terms ofseveral evaluation criteria for multilabel prediction.},
  author = {Cheng, Weiwei and Hüllermeier, Eyke},
  interhash = {1f49c2672a44144b7073d3d7e9f82346},
  intrahash = {40bd0d294de6c597255ae86dff700230},
  journal = {Machine Learning and Knowledge Discovery in Databases},
  pages = {6--6},
  title = {Combining Instance-Based Learning and Logistic Regression for Multilabel Classification},
  url = {http://dx.doi.org/10.1007/978-3-642-04180-8_6},
  year = 2009
}

@article{christian2009withinnetwork,
  abstract = {Within-network classification, where the goal is to classify the nodes of a partly labeled network, is a semi-supervised learning
problem that has applications in several important domains like image processing, the classification of documents, and thedetection of malicious activities. While most methods for this problem infer the missing labels collectively based on thehypothesis that linked or nearby nodes are likely to have the same labels, there are many types of networks for which thisassumption fails, e.g., molecular graphs, trading networks, etc. In this paper, we present a collective classification method,based on relaxation labeling, that classifies entities of a network using their local structure. This method uses a marginalizedsimilarity kernel that compares the local structure of two nodes with random walks in the network. Through experimentationon different datasets, we show our method to be more accurate than several state-of-the-art approaches for this problem.},
  author = {Desrosiers, Christian and Karypis, George},
  interhash = {5db04cc3cfea4d9777a55c7c9a44f71c},
  intrahash = {fbcbbf5c016ec86fe15591e70f71b66b},
  journal = {Machine Learning and Knowledge Discovery in Databases},
  pages = {260--275},
  title = {Within-Network Classification Using Local Structure Similarity},
  url = {http://dx.doi.org/10.1007/978-3-642-04180-8_34},
  year = 2009
}

@inproceedings{bekkerman2005multiway,
  abstract = {We present a novel unsupervised learning scheme that simultaneously clusters variables of several types (e.g., documents, words and authors) based on pairwise interactions between the types, as observed in co-occurrence data. In this scheme, multiple clustering systems are generated aiming at maximizing an objective function that measures multiple pairwise mutual information between cluster variables. To implement this idea, we propose an algorithm that interleaves top-down clustering of some variables and bottom-up clustering of the other variables, with a local optimization correction routine. Focusing on document clustering we present an extensive empirical study of two-way, three-way and four-way applications of our scheme using six real-world datasets including the 20 News-groups (20NG) and the Enron email collection. Our multi-way distributional clustering (MDC) algorithms consistently and significantly outperform previous state-of-the-art information theoretic clustering algorithms.},
  address = {New York, NY, USA},
  author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew},
  booktitle = {ICML '05: Proceedings of the 22nd International Conference on Machine learning},
  doi = {10.1145/1102351.1102357},
  interhash = {25609f84a6916c1664e61d8618f46a32},
  intrahash = {2921f89f8663e7bcc122a2a77c66e7c2},
  isbn = {1-59593-180-5},
  location = {Bonn, Germany},
  pages = {41--48},
  publisher = {ACM},
  title = {Multi-way distributional clustering via pairwise interactions},
  url = {http://portal.acm.org/citation.cfm?id=1102351.1102357},
  year = 2005
}

@inproceedings{conf/grc/ZhangZ05b,
  author = {Zhang, Min-Ling and Zhou, Zhi-Hua},
  booktitle = {GrC},
  crossref = {conf/grc/2005},
  date = {2007-03-22},
  editor = {Hu, Xiaohua and Liu, Qing and Skowron, Andrzej and Lin, Tsau Young and Yager, Ronald R. and Zhang, Bo},
  ee = {http://doi.ieeecomputersociety.org/10.1109/GRC.2005.1547385},
  interhash = {3c58a80457442249a999a2ceea877565},
  intrahash = {3460603745790c4824a9bdb572e64777},
  isbn = {0-7803-9017-2},
  pages = {718-721},
  publisher = {IEEE},
  title = {A k-nearest neighbor based algorithm for multi-label classification.},
  url = {http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/grc05.pdf},
  year = 2005
}

@inproceedings{Bickel&Scheffer04,
  author = {Bickel, S. and Scheffer, T.},
  booktitle = {Proceedings of the IEEE International Conference on Data Mining},
  interhash = {3e2001aa90bdeac09a91ee0e47552c07},
  intrahash = {d7288e68d25ddb2cf8936d14a67f0c62},
  title = {Multi--View Clustering},
  year = 2004
}

@inproceedings{1102357,
  address = {New York, NY, USA},
  author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew},
  booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine learning},
  doi = {http://doi.acm.org/10.1145/1102351.1102357},
  interhash = {25609f84a6916c1664e61d8618f46a32},
  intrahash = {a5ac489feb7407a07570f6733665a6dd},
  isbn = {1-59593-180-5},
  location = {Bonn, Germany},
  pages = {41--48},
  publisher = {ACM Press},
  title = {Multi-way distributional clustering via pairwise interactions},
  url = {http://www.cs.technion.ac.il/~rani/el-yaniv-papers/BekkermanEM05.pdf},
  year = 2005
}

@article{keyhere,
  abstract = {An established technique to face a multiclass categorization problem is to reduce it into a set of two-class problems. To this aim, the main decomposition schemes employed are one vs. one, one vs. all and Error Correcting Output Coding. A point not yet considered in the research is how to apply these methods to a cost-sensitive classification that represents a significant aspect in many real problems. In this paper we propose a novel method which, starting from the cost matrix for the multi-class problem and from the code matrix employed, extracts a cost matrix for each of the binary subproblems induced by the coding matrix. In this way, it is possible to tune the single two-class classifier according to the cost matrix obtained and achieve an output from all the dichotomizers which takes into account the requirements of the original multi-class cost matrix. To evaluate the effectiveness of the method, a large number of tests has been performed on real data sets. The experiments results have shown a significant improvement in terms of classification cost, specially when using the ECOC scheme.
ER  -},
  author = {Marrocco, Claudio and Tortorella, Francesco},
  interhash = {11a4ba8234ccd19f9362591e0a1963f4},
  intrahash = {a234beda6a9a042041c89b21c8291eb0},
  journal = {Structural, Syntactic, and Statistical Pattern Recognition},
  pages = {753--761},
  title = {A Cost-Sensitive Paradigm for Multiclass to Binary Decomposition Schemes},
  url = {http://www.springerlink.com/content/5fdg88yxqvwale7j},
  year = 2004
}