Publikationen

Yan, R.; Huang, C.; Tang, J.; Zhang, Y. & Li, X. (2012): To better stand on the shoulder of giants. In: Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries, New York, NY, USA. [Volltext]

@inproceedings{yan2012better,
  author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming},
  title = {To better stand on the shoulder of giants},
  booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries},
  series = {JCDL '12},
  publisher = {ACM},
  address = {New York, NY, USA},
  year = {2012},
  pages = {51--60},
  url = {http://doi.acm.org/10.1145/2232817.2232831},
  doi = {10.1145/2232817.2232831},
  isbn = {978-1-4503-1154-0},
  keywords = {scientometrics, sota, analysis, citation},
  abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}
  }

@inproceedings{Yan:2012:BSS:2232817.2232831,
  author = {Yan, Rui and Huang, Congrui and Tang, Jie and Zhang, Yan and Li, Xiaoming},
  title = {To better stand on the shoulder of giants},
  booktitle = {Proceedings of the 12th ACM/IEEE-CS joint conference on Digital Libraries},
  series = {JCDL '12},
  publisher = {ACM},
  address = {New York, NY, USA},
  year = {2012},
  pages = {51--60},
  url = {http://doi.acm.org/10.1145/2232817.2232831},
  doi = {10.1145/2232817.2232831},
  isbn = {978-1-4503-1154-0},
  keywords = {sota, prediction, citation, toread},
  abstract = {Usually scientists breed research ideas inspired by previous publications, but they are unlikely to follow all publications in the unbounded literature collection. The volume of literature keeps on expanding extremely fast, whilst not all papers contribute equal impact to the academic society. Being aware of potentially influential literature would put one in an advanced position in choosing important research references. Hence, estimation of potential influence is of great significance. We study a challenging problem of identifying potentially influential literature. We examine a set of hypotheses on what are the fundamental characteristics for highly cited papers and find some interesting patterns. Based on these observations, we learn to identify potentially influential literature via Future Influence Prediction (FIP), which aims to estimate the future influence of literature. The system takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R2). Experimental results on a real-large data set show a mean average predictive performance of 83.6% measured in R^2. We apply the learned model to the application of bibliography recommendation and obtain prominent performance improvement in terms of Mean Average Precision (MAP).}
  }

2011

Yan, R.; Tang, J.; Liu, X.; Shan, D. & Li, X. (2011): Citation count prediction: learning to estimate future citations for literature. In: Proceedings of the 20th ACM international conference on Information and knowledge management, New York, NY, USA. [Volltext]

@inproceedings{yan2011citation,
  author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming},
  title = {Citation count prediction: learning to estimate future citations for literature},
  booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management},
  series = {CIKM '11},
  publisher = {ACM},
  address = {New York, NY, USA},
  year = {2011},
  pages = {1247--1252},
  url = {http://doi.acm.org/10.1145/2063576.2063757},
  doi = {10.1145/2063576.2063757},
  isbn = {978-1-4503-0717-8},
  keywords = {scientometrics, sota, prediction, citation},
  abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}
  }

@inproceedings{yan2011citation,
  author = {Yan, Rui and Tang, Jie and Liu, Xiaobing and Shan, Dongdong and Li, Xiaoming},
  title = {Citation count prediction: learning to estimate future citations for literature},
  booktitle = {Proceedings of the 20th ACM international conference on Information and knowledge management},
  series = {CIKM '11},
  publisher = {ACM},
  address = {New York, NY, USA},
  year = {2011},
  pages = {1247--1252},
  url = {http://doi.acm.org/10.1145/2063576.2063757},
  doi = {10.1145/2063576.2063757},
  isbn = {978-1-4503-0717-8},
  keywords = {scientometrics, prediction, citation},
  abstract = {In most of the cases, scientists depend on previous literature which is relevant to their research fields for developing new ideas. However, it is not wise, nor possible, to track all existed publications because the volume of literature collection grows extremely fast. Therefore, researchers generally follow, or cite merely a small proportion of publications which they are interested in. For such a large collection, it is rather interesting to forecast which kind of literature is more likely to attract scientists' response. In this paper, we use the citations as a measurement for the popularity among researchers and study the interesting problem of Citation Count Prediction (CCP) to examine the characteristics for popularity. Estimation of possible popularity is of great significance and is quite challenging. We have utilized several features of fundamental characteristics for those papers that are highly cited and have predicted the popularity degree of each literature in the future. We have implemented a system which takes a series of features of a particular publication as input and produces as output the estimated citation counts of that article after a given time period. We consider several regression models to formulate the learning process and evaluate their performance based on the coefficient of determination (R-square). Experimental results on a real-large data set show that the best predictive model achieves a mean average predictive performance of 0.740 measured in R-square, which significantly outperforms several alternative algorithms.}
  }