@misc{yu2013largescale,
  abstract = {The multi-label classification problem has generated significant interest in
recent years. However, existing approaches do not adequately address two key
challenges: (a) the ability to tackle problems with a large number (say
millions) of labels, and (b) the ability to handle data with missing labels. In
this paper, we directly address both these problems by studying the multi-label
problem in a generic empirical risk minimization (ERM) framework. Our
framework, despite being simple, is surprisingly able to encompass several
recent label-compression based methods which can be derived as special cases of
our method. To optimize the ERM problem, we develop techniques that exploit the
structure of specific loss functions - such as the squared loss function - to
offer efficient algorithms. We further show that our learning framework admits
formal excess risk bounds even in the presence of missing labels. Our risk
bounds are tight and demonstrate better generalization performance for low-rank
promoting trace-norm regularization when compared to (rank insensitive)
Frobenius norm regularization. Finally, we present extensive empirical results
on a variety of benchmark datasets and show that our methods perform
significantly better than existing label compression based methods and can
scale up to very large datasets such as the Wikipedia dataset.},
  author = {Yu, Hsiang-Fu and Jain, Prateek and Kar, Purushottam and Dhillon, Inderjit S.},
  interhash = {1252173520757338468a68e028494647},
  intrahash = {716e5270c1dcb3a1e4eedf9934859021},
  note = {cite arxiv:1307.5101},
  title = {Large-scale Multi-label Learning with Missing Labels},
  url = {http://arxiv.org/abs/1307.5101},
  year = 2013
}

@proceedings{thierrypoibeau2013multisource,
  abstract = {Information extraction (IE) and text summarization (TS) are powerful technologies for finding relevant pieces of information in text and presenting them to the user in condensed form. The ongoing information explosion makes IE and TS critical for successful functioning within the information society. These technologies face particular challenges due to the inherent multi-source nature of the information explosion.  The technologies must now handle not isolated texts or individual narratives, but rather large-scale repositories and streams--in general, in multiple languages--containing a multiplicity of perspectives, opinions, or commentaries on particular topics, entities or events.  There is thus a need to adapt existing techniques and develop new ones to deal with these challenges. This volume contains a selection of papers that present a variety of methodologies for content identification and extraction, as well as for content fusion and regeneration. The chapters cover various aspects of the challenges, depending on the nature of the information sought--names vs. events,-- and the nature of the sources--news streams vs. image captions vs. scientific research papers, etc. This volume aims to offer a broad and representative sample of studies from this very active research field.},
  address = {Berlin; New York},
  editor = {Poibeau, Thierry and Saggion, Horacio and Piskorski, Jakub and Yangarber, Roman},
  interhash = {b1d51398d5660ed1e16f40d74cc815db},
  intrahash = {21816f2809a2b58397acce5ac9558d28},
  isbn = {9783642285691 3642285694 3642285686 9783642285684},
  publisher = {Springer},
  refid = {808368416},
  title = {Multi-source, multilingual information extraction and summarization},
  url = {http://link.springer.com/book/10.1007/978-3-642-28569-1},
  year = 2013
}

@misc{titov2008modeling,
  abstract = {In this paper we present a novel framework for extracting the ratable aspects of objects from online user reviews. Extracting such aspects is an important challenge in automatically mining product opinions from the web and in generating opinion-based summaries of user reviews. Our models are based on extensions to standard topic modeling methods such as LDA and PLSA to induce multi-grain topics. We argue that multi-grain models are more appropriate for our task since standard models tend to produce topics that correspond to global properties of objects (e.g., the brand of a product type) rather than the aspects of an object that tend to be rated by a user. The models we present not only extract ratable aspects, but also cluster them into coherent topics, e.g., `waitress' and `bartender' are part of the same topic `staff' for restaurants. This differentiates it from much of the previous work which extracts aspects through term frequency analysis with minimal clustering. We evaluate the multi-grain models both qualitatively and quantitatively to show that they improve significantly upon standard topic models.},
  author = {Titov, Ivan and McDonald, Ryan},
  interhash = {00cbf1df09c3f2c65d5a31a0537aed3f},
  intrahash = {f3286f5efa0115f465563d0259c32255},
  note = {cite arxiv:0801.1063},
  title = {Modeling Online Reviews with Multi-grain Topic Models},
  url = {http://arxiv.org/abs/0801.1063},
  year = 2008
}

@inproceedings{conf/grc/ZhangZ05b,
  author = {Zhang, Min-Ling and Zhou, Zhi-Hua},
  booktitle = {GrC},
  crossref = {conf/grc/2005},
  date = {2007-03-22},
  editor = {Hu, Xiaohua and Liu, Qing and Skowron, Andrzej and Lin, Tsau Young and Yager, Ronald R. and Zhang, Bo},
  ee = {http://doi.ieeecomputersociety.org/10.1109/GRC.2005.1547385},
  interhash = {3c58a80457442249a999a2ceea877565},
  intrahash = {3460603745790c4824a9bdb572e64777},
  isbn = {0-7803-9017-2},
  pages = {718-721},
  publisher = {IEEE},
  title = {A k-nearest neighbor based algorithm for multi-label classification.},
  url = {http://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/grc05.pdf},
  year = 2005
}

@inproceedings{Bickel&Scheffer04,
  author = {Bickel, S. and Scheffer, T.},
  booktitle = {Proceedings of the IEEE International Conference on Data Mining},
  interhash = {3e2001aa90bdeac09a91ee0e47552c07},
  intrahash = {d7288e68d25ddb2cf8936d14a67f0c62},
  title = {Multi--View Clustering},
  year = 2004
}

@inproceedings{1102357,
  address = {New York, NY, USA},
  author = {Bekkerman, Ron and El-Yaniv, Ran and McCallum, Andrew},
  booktitle = {ICML '05: Proceedings of the 22nd international conference on Machine learning},
  doi = {http://doi.acm.org/10.1145/1102351.1102357},
  interhash = {25609f84a6916c1664e61d8618f46a32},
  intrahash = {a5ac489feb7407a07570f6733665a6dd},
  isbn = {1-59593-180-5},
  location = {Bonn, Germany},
  pages = {41--48},
  publisher = {ACM Press},
  title = {Multi-way distributional clustering via pairwise interactions},
  url = {http://www.cs.technion.ac.il/~rani/el-yaniv-papers/BekkermanEM05.pdf},
  year = 2005
}

@article{keyhere,
  abstract = {An established technique to face a multiclass categorization problem is to reduce it into a set of two-class problems. To this aim, the main decomposition schemes employed are one vs. one, one vs. all and Error Correcting Output Coding. A point not yet considered in the research is how to apply these methods to a cost-sensitive classification that represents a significant aspect in many real problems. In this paper we propose a novel method which, starting from the cost matrix for the multi-class problem and from the code matrix employed, extracts a cost matrix for each of the binary subproblems induced by the coding matrix. In this way, it is possible to tune the single two-class classifier according to the cost matrix obtained and achieve an output from all the dichotomizers which takes into account the requirements of the original multi-class cost matrix. To evaluate the effectiveness of the method, a large number of tests has been performed on real data sets. The experiments results have shown a significant improvement in terms of classification cost, specially when using the ECOC scheme.
ER  -},
  author = {Marrocco, Claudio and Tortorella, Francesco},
  interhash = {11a4ba8234ccd19f9362591e0a1963f4},
  intrahash = {a234beda6a9a042041c89b21c8291eb0},
  journal = {Structural, Syntactic, and Statistical Pattern Recognition},
  pages = {753--761},
  title = {A Cost-Sensitive Paradigm for Multiclass to Binary Decomposition Schemes},
  url = {http://www.springerlink.com/content/5fdg88yxqvwale7j},
  year = 2004
}