@inproceedings{pereiranunes2012entities, abstract = {The richness of the (Semantic) Web lies in its ability to link related resources as well as data across the Web. However, while relations within particular datasets are often well defined, links between disparate datasets and corpora of Web resources are rare. The increasingly widespread use of cross-domain reference datasets, such as Freebase and DBpedia for annotating and enriching datasets as well as document corpora, opens up opportunities to exploit their inherent semantics to uncover semantic relationships between disparate resources. In this paper, we present an approach to uncover relationships between disparate entities by analyzing the graphs of used reference datasets. We adapt a relationship assessment methodology from social network theory to measure the connectivity between entities in reference datasets and exploit these measures to identify correlated Web resources. Finally, we present an evaluation of our approach using the publicly available datasets Bibsonomy and USAToday. }, author = {Pereira Nunes, Bernardo and Kawase, Ricardo and Dietze, Stefan and Taibi, Davide and Casanova, Marco Antonio and Nejdl, Wolfgang}, booktitle = {Proceedings of the Web of Linked Entities Workshop in conjuction with the 11th International Semantic Web Conference}, editor = {Rizzo, Giuseppe and Mendes, Pablo and Charton, Eric and Hellmann, Sebastian and Kalyanpur, Aditya}, institution = {Bernardo Pereira Nunes, Ricardo Kawase, Stefan Dietze, Davide Taibi, Marco Antonio Casanova, Wolfgang Nejdl}, interhash = {8f969b917268449792c130dcbab06e69}, intrahash = {f22943239296ada0dfa11c30c5b4904a}, issb = {1613-0073}, month = nov, pages = {45--57}, series = {CEUR-WS.org}, title = {Can Entities be Friends?}, url = {http://ceur-ws.org/Vol-906/paper6.pdf}, urn = {urn:nbn:de:0074-906-7}, volume = 906, year = 2012 } @article{tejada2001learning, abstract = {When integrating information from multiple websites, the same data objects can exist in inconsistent text formats across sites, making it difficult to identify matching objects using exact text match. We have developed an object identification system called Active Atlas, which compares the objects’ shared attributes in order to identify matching objects. Certain attributes are more important for deciding if a mapping should exist between two objects. Previous methods of object identification have required manual construction of object identification rules or mapping rules for determining the mappings between objects. This manual process is time consuming and error-prone. In our approach. Active Atlas learns to tailor mapping rules, through limited user input, to a specific application domain. The experimental results demonstrate that we achieve higher accuracy and require less user involvement than previous methods across various application domains.}, author = {Tejada, Sheila and Knoblock, Craig A and Minton, Steven}, doi = {10.1016/S0306-4379(01)00042-4}, interhash = {f9f59187b0397a0fbe1e558dfb4ad9cf}, intrahash = {5ad46801d602408ce271276f452263a9}, issn = {0306-4379}, journal = {Information Systems}, month = dec, number = 8, pages = {607--633}, title = {Learning object identification rules for information integration}, url = {http://www.sciencedirect.com/science/article/pii/S0306437901000424}, volume = 26, year = 2001 } @article{borges2011classification, abstract = {Digital libraries of scientific articles describe them using a set of metadata, including bibliographic references. These references can be represented by several formats and styles. Considerable content variations can occur in some metadata fields such as title, author names and publication venue. Besides, it is quite common to find references that omit same metadata fields such as page numbers. Duplicate entries influence the quality of digital library services once they need to be appropriately identified and treated. This paper presents a comparative analysis among different data classification algorithms used to identify duplicated bibliographic metadata records. We have investigated the discovered patterns by comparing the rules and the decision tree with the heuristics adopted in a previous work. Our experiments show that the combination of specific-purpose similarity functions previously proposed and classification algorithms represent an improvement up to 12% when compared to the experiments using our original approach. }, author = {Borges, Eduardo N. and Becker, Karin and Heuser, Carlos A. and Galante, Renata}, editor = {White, Bebo and Isaías, Pedro and Santoro, Flávia Maria}, interhash = {ca7720210214f632758211735154eea2}, intrahash = {8f87206e413c2c632b5c633f484fcbe2}, journal = {Proceedings of the IADIS International Conference WWW/Internet 2011 }, pages = {221--228}, title = {A Classification-based Approach for Bibliographic Metadata Deduplication}, url = {http://www.eduardo.c3.furg.br/arquivos/download/www-internet2011.pdf}, year = 2011 } @inproceedings{mitzlaff2010visit, abstract = {The ongoing spread of online social networking and sharing sites has reshaped the way how people interact with each other. Analyzing the relatedness of different users within the resulting large populations of these systems plays an important role for tasks like user recommendation or community detection. Algorithms in these fields typically face the problem that explicit user relationships (like friend lists) are often very sparse. Surprisingly, implicit evidences (like click logs) of user relations have hardly been considered to this end. Based on our long-time experience with running BibSonomy [4], we identify in this paper different evidence networks of user relationships in our system. We broadly classify each network based on whether the links are explicitly established by the users (e.g., friendship or group membership) or accrue implicitly in the running system (e.g., when user u copies an entry of user v). We systematically analyze structural properties of these networks and whether topological closeness (in terms of the length of shortest paths) coincides with semantic similarity between users.}, address = {New York, NY, USA}, author = {Mitzlaff, Folke and Benz, Dominik and Stumme, Gerd and Hotho, Andreas}, booktitle = {HT '10: Proceedings of the 21st ACM Conference on Hypertext and Hypermedia}, doi = {10.1145/1810617.1810664}, interhash = {5584c4c57fcd8eb4663df8b114bcf09c}, intrahash = {6628bf43e3834ba147a22992f2f534e9}, isbn = {978-1-4503-0041-4}, location = {Toronto, Ontario, Canada}, pages = {265--270}, publisher = {ACM}, title = {Visit me, click me, be my friend: an analysis of evidence networks of user relationships in BibSonomy}, url = {http://portal.acm.org/citation.cfm?id=1810617.1810664}, year = 2010 } @inproceedings{voss2009mapping, abstract = {This poster presents a set of hash keys for bibliographic records called bibkeys. Unlike other methods of duplicate detection, bibkeys can directly be calculated from a set of basic metadata fields (title, authors/editors, year). It is shown how bibkeys are used to map similar bibliographic records in BibSonomy and among distributed library catalogs and other distributed databases.}, author = {Voss, Jakob and Hotho, Andreas and Jäschke, Robert}, booktitle = {Information: Droge, Ware oder Commons?}, editor = {Kuhlen, Rainer}, interhash = {6e394e459d11dfa17f5d4cf1b8dd81c3}, intrahash = {01f6fe57f46e4b92fe02869341efdd8d}, organization = {Hochschulverband Informationswissenschaft}, publisher = {Verlag Werner Hülsbusch}, series = {Proceedings of the ISI}, title = {Mapping Bibliographic Records with Bibliographic Hash Keys}, url = {http://eprints.rclis.org/15953/}, year = 2009 } @inproceedings{hotho2006trend, abstract = {As the number of resources on the web exceeds by far the number of documents one can track, it becomes increasingly difficult to remain up to date on ones own areas of interest. The problem becomes more severe with the increasing fraction of multimedia data, from which it is difficult to extract some conceptual description of their contents. One way to overcome this problem are social bookmark tools, which are rapidly emerging on the web. In such systems, users are setting up lightweight conceptual structures called folksonomies, and overcome thus the knowledge acquisition bottleneck. As more and more people participate in the effort, the use of a common vocabulary becomes more and more stable. We present an approach for discovering topic-specific trends within folksonomies. It is based on a differential adaptation of the PageRank algorithm to the triadic hypergraph structure of a folksonomy. The approach allows for any kind of data, as it does not rely on the internal structure of the documents. In particular, this allows to consider different data types in the same analysis step. We run experiments on a large-scale real-world snapshot of a social bookmarking system.}, address = {Heidelberg}, author = {Hotho, Andreas and Jäschke, Robert and Schmitz, Christoph and Stumme, Gerd}, booktitle = {Proc. First International Conference on Semantics And Digital Media Technology (SAMT) }, date = {2006-12-13}, editor = {Avrithis, Yannis S. and Kompatsiaris, Yiannis and Staab, Steffen and O'Connor, Noel E.}, ee = {http://dx.doi.org/10.1007/11930334_5}, interhash = {227be738c5cea57530d592463fd09abd}, intrahash = {42cda5911e901eadd0ac6a106a6aa1dc}, isbn = {3-540-49335-2}, month = dec, pages = {56-70}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, title = {Trend Detection in Folksonomies}, url = {http://www.kde.cs.uni-kassel.de/stumme/papers/2006/hotho2006trend.pdf}, vgwort = {27}, volume = 4306, year = 2006 } @article{barber2007mac, abstract = {The modularity of a network quantifies the extent, relative to a null model network, to which vertices cluster into community groups. We define a null model appropriate for bipartite networks, and use it to define a bipartite modularity. The bipartite modularity is presented in terms of a modularity matrix B; some key properties of the eigenspectrum of B are identified and used to describe an algorithm for identifying modules in bipartite networks. The algorithm is based on the idea that the modules in the two parts of the network are dependent, with each part mutually being used to induce the vertices for the other part into the modules. We apply the algorithm to real-world network data, showing that the algorithm successfully identifies the modular structure of bipartite networks.}, author = {Barber, M. J.}, doi = {10.1103/PhysRevE.76.066102}, interhash = {e1d9f528c49b34ff4a05b2b0060bd653}, intrahash = {61f9d5839845d5d8fa1883a46a2f7744}, journal = {Physical Review E}, number = 6, title = {Modularity and community detection in bipartite networks}, url = {http://arxiv.org/abs/arXiv:0707.1616}, volume = 76, year = 2007 } @article{guimera2007mib, abstract = {Modularity is one of the most prominent properties of real-world complex networks. Here, we address the issue of module identification in two important classes of networks: bipartite networks and directed unipartite networks. Nodes in bipartite networks are divided into two non-overlapping sets, and the links must have one end node from each set. Directed unipartite networks only have one type of nodes, but links have an origin and an end. We show that directed unipartite networks can be conviniently represented as bipartite networks for module identification purposes. We report a novel approach especially suited for module detection in bipartite networks, and define a set of random networks that enable us to validate the new approach.}, author = {Guimer{\`a}, R. and Sales-Pardo, M. and Amaral, L.A.N.}, doi = {10.1103/PhysRevE.76.036102}, interhash = {a87821c7c8e7d5ca89cb369e6215a0f3}, intrahash = {6145a42fe04aee556fa7a68c7cea7db3}, journal = {Physical review. E, Statistical, nonlinear, and soft matter physics}, number = {3 Pt 2}, pages = 036102, publisher = {NIH Public Access}, title = {Module identification in bipartite and directed networks}, url = {http://arxiv.org/abs/physics/0701151}, volume = 76, year = 2007 } @inproceedings{Detecting_Commmunities_via_Simultaneous_Clustering_of_Graphs_and_Folksonomies, author = {Java, Akshay and Joshi, Anupam and Finin, Tim}, booktitle = {WebKDD 2008 Workshop on Web Mining and Web Usage Analysis}, interhash = {acfec953843b168e61e2e167e29b4c3d}, intrahash = {645abd6b3191a2a6e844d7542651ed1c}, month = {August}, note = {To Appear}, title = {Detecting Commmunities via Simultaneous Clustering of Graphs and Folksonomies}, year = 2008 } @article{duch-2005-72, abstract = {We propose a novel method to find the community structure in complex networks based on an extremal optimization of the value of modularity. The method outperforms the optimal modularity found by the existing algorithms in the literature. We present the results of the algorithm for computer simulated and real networks and compare them with other approaches. The efficiency and accuracy of the method make it feasible to be used for the accurate identification of community structure in large complex networks.}, author = {Duch, J. and Arenas, A.}, interhash = {2e37e9b6a0f76e94125990a47cd287f3}, intrahash = {36d905c5223e5516db9d08eb3e0bc9fc}, journal = {Physical Review E}, pages = 027104, title = {Community detection in complex networks using Extremal Optimization}, url = {http://www.citebase.org/abstract?id=oai:arXiv.org:cond-mat/0501368}, volume = 72, year = 2005 }