@inproceedings{he2022design,
  abstract = {Continual learning can enable neural networks to evolve by learning new tasks sequentially in task-changing scenarios. 
However, two general challenges should be overcome in further research before we apply this technique to real-world applications. 
Firstly, newly collected novelties from the data stream in applications could contain anomalies that are meaningless for continual learning. 
Instead of viewing them as a new task for updating, we have to filter out such anomalies to reduce the disturbance of extremely high-entropy data for the progression of convergence. 
Secondly, fewer efforts have been put into research regarding the explainability of continual learning, which leads to a lack of transparency and credibility of the updated neural networks. 
Elaborated explanations about the process and result of continual learning can help experts in judgment and making decisions. 
Therefore, we propose the conceptual design of an explainability module with experts in the loop based on techniques, such as dimension reduction, visualization, and evaluation strategies. 
This work aims to overcome the mentioned challenges by sufficiently explaining and visualizing the identified anomalies and the updated neural network. 
With the help of this module, experts can be more confident in decision-making regarding anomaly filtering, dynamic adjustment of hyperparameters, data backup, etc.},
  author = {He, Yujiang and Huang, Zhixin and Sick, Bernhard},
  booktitle = {Workshop on Interactive Machine Learning Workshop (IMLW), AAAI},
  interhash = {e1229de8e285fd3b266fd73c3f5287c1},
  intrahash = {040c3c0e2cae3bcf668f6c6a67aed6be},
  pages = {1--6},
  title = {Design of Explainability Module with Experts in the Loop for Visualization and Dynamic Adjustment of Continual Learning},
  url = {https://arxiv.org/abs/2202.06781},
  year = 2022
}

@article{adomavicius2012impact,
  abstract = {This article investigates the impact of rating data characteristics on the performance of several popular recommendation algorithms, including user-based and item-based collaborative filtering, as well as matrix factorization. We focus on three groups of data characteristics: rating space, rating frequency distribution, and rating value distribution. A sampling procedure was employed to obtain different rating data subsamples with varying characteristics; recommendation algorithms were used to estimate the predictive accuracy for each sample; and linear regression-based models were used to uncover the relationships between data characteristics and recommendation accuracy. Experimental results on multiple rating datasets show the consistent and significant effects of several data characteristics on recommendation accuracy.},
  acmid = {2151166},
  address = {New York, NY, USA},
  articleno = {3},
  author = {Adomavicius, Gediminas and Zhang, Jingjing},
  doi = {10.1145/2151163.2151166},
  interhash = {53e424cc9502ebb33d38de1d04230196},
  intrahash = {e41453a56391ca382f2298607b361208},
  issn = {2158-656X},
  issue_date = {April 2012},
  journal = {ACM Trans. Manage. Inf. Syst.},
  month = apr,
  number = 1,
  numpages = {17},
  pages = {3:1--3:17},
  publisher = {ACM},
  title = {Impact of Data Characteristics on Recommender Systems Performance},
  url = {http://doi.acm.org/10.1145/2151163.2151166},
  volume = 3,
  year = 2012
}

@inproceedings{cremonesi2010performance,
  abstract = {In many commercial systems, the 'best bet' recommendations are shown, but the predicted rating values are not. This is usually referred to as a top-N recommendation task, where the goal of the recommender system is to find a few specific items which are supposed to be most appealing to the user. Common methodologies based on error metrics (such as RMSE) are not a natural fit for evaluating the top-N recommendation task. Rather, top-N performance can be directly measured by alternative methodologies based on accuracy metrics (such as precision/recall). An extensive evaluation of several state-of-the art recommender algorithms suggests that algorithms optimized for minimizing RMSE do not necessarily perform as expected in terms of top-N recommendation task. Results show that improvements in RMSE often do not translate into accuracy improvements. In particular, a naive non-personalized algorithm can outperform some common recommendation approaches and almost match the accuracy of sophisticated algorithms. Another finding is that the very few top popular items can skew the top-N performance. The analysis points out that when evaluating a recommender algorithm on the top-N recommendation task, the test set should be chosen carefully in order to not bias accuracy metrics towards non-personalized solutions. Finally, we offer practitioners new variants of two collaborative filtering algorithms that, regardless of their RMSE, significantly outperform other recommender algorithms in pursuing the top-N recommendation task, with offering additional practical advantages. This comes at surprise given the simplicity of these two methods.},
  acmid = {1864721},
  address = {New York, NY, USA},
  author = {Cremonesi, Paolo and Koren, Yehuda and Turrin, Roberto},
  booktitle = {Proceedings of the Fourth ACM Conference on Recommender Systems},
  doi = {10.1145/1864708.1864721},
  interhash = {04cb3373b65b03e03225f447250e7873},
  intrahash = {aeab7f02942cfeb97ccc7ae0a1d60801},
  isbn = {978-1-60558-906-0},
  location = {Barcelona, Spain},
  numpages = {8},
  pages = {39--46},
  publisher = {ACM},
  series = {RecSys '10},
  title = {Performance of Recommender Algorithms on Top-n Recommendation Tasks},
  url = {http://doi.acm.org/10.1145/1864708.1864721},
  year = 2010
}

@inproceedings{korner2010categorizers,
  abstract = {While recent research has advanced our understanding about the structure and dynamics of social tagging systems, we know little about (i) the underlying motivations for tagging (why users tag), and (ii) how they influence the properties of resulting tags and folksonomies. In this paper, we focus on problem (i) based on a distinction between two types of user motivations that we have identified in earlier work: Categorizers vs. Describers. To that end, we systematically define and evaluate a number of measures designed to discriminate between describers, i.e. users who use tags for describing resources as opposed to categorizers, i.e. users who use tags for categorizing resources. Subsequently, we present empirical findings from qualitative and quantitative evaluations of the measures on real world tagging behavior. In addition, we conducted a recommender evaluation in which we study the effectiveness of each of the presented measures and found the measure based on the tag content to be the most accurate in predicting the user behavior closely followed by a content independent measure. The overall contribution of this paper is the presentation of empirical evidence that tagging motivation can be approximated with simple statistical measures. Our research is relevant for (a) designers of tagging systems aiming to better understand the motivations of their users and (b) researchers interested in studying the effects of users' tagging motivation on the properties of resulting tags and emergent structures in social tagging systems},
  acmid = {1810645},
  address = {New York, NY, USA},
  author = {K\"{o}rner, Christian and Kern, Roman and Grahsl, Hans-Peter and Strohmaier, Markus},
  booktitle = {Proceedings of the 21st ACM Conference on Hypertext and Hypermedia},
  doi = {10.1145/1810617.1810645},
  interhash = {ccca64b638181c35972c71e586ddc0c2},
  intrahash = {87e3f9fa38eed6342454dcf47bb3e575},
  isbn = {978-1-4503-0041-4},
  location = {Toronto, Ontario, Canada},
  numpages = {10},
  pages = {157--166},
  publisher = {ACM},
  series = {HT '10},
  title = {Of Categorizers and Describers: An Evaluation of Quantitative Measures for Tagging Motivation},
  url = {http://doi.acm.org/10.1145/1810617.1810645},
  year = 2010
}

@inproceedings{doerfel2013analysis,
  abstract = {Since the rise of collaborative tagging systems on the web, the tag recommendation task -- suggesting suitable tags to users of such systems while they add resources to their collection -- has been tackled. However, the (offline) evaluation of tag recommendation algorithms usually suffers from difficulties like the sparseness of the data or the cold start problem for new resources or users. Previous studies therefore often used so-called post-cores (specific subsets of the original datasets) for their experiments. In this paper, we conduct a large-scale experiment in which we analyze different tag recommendation algorithms on different cores of three real-world datasets. We show, that a recommender's performance depends on the particular core and explore correlations between performances on different cores.},
  acmid = {2507222},
  address = {New York, NY, USA},
  author = {Doerfel, Stephan and Jäschke, Robert},
  booktitle = {Proceedings of the 7th ACM conference on Recommender systems},
  doi = {10.1145/2507157.2507222},
  interhash = {3eaf2beb1cdad39b7c5735a82c3338dd},
  intrahash = {a73213a865503252caa4b28e88a77108},
  isbn = {978-1-4503-2409-0},
  location = {Hong Kong, China},
  numpages = {4},
  pages = {343--346},
  publisher = {ACM},
  series = {RecSys '13},
  title = {An Analysis of Tag-Recommender Evaluation Procedures},
  url = {http://doi.acm.org/10.1145/2507157.2507222},
  year = 2013
}