@inproceedings{michelbacher-etal-2010-building,
title = "Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure",
author = {Michelbacher, Lukas and
Laws, Florian and
Dorow, Beate and
Heid, Ulrich and
Sch{\"u}tze, Hinrich},
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf",
abstract = "The Internet is an ever growing source of information stored in documents of different languages. Hence, cross-lingual resources are needed for more and more NLP applications. This paper presents (i) a graph-based method for creating one such resource and (ii) a resource created using the method, a cross-lingual relatedness thesaurus. Given a word in one language, the thesaurus suggests words in a second language that are semantically related. The method requires two monolingual corpora and a basic dictionary. Our general approach is to build two monolingual word graphs, with nodes representing words and edges representing linguistic relations between words. A bilingual dictionary containing basic vocabulary provides seed translations relating nodes from both graphs. We then use an inter-graph node-similarity algorithm to discover related words. Evaluation with three human judges revealed that 49{\%} of the English and 57{\%} of the German words discovered by our method are semantically related to the target words. We publish two resources in conjunction with this paper. First, noun coordinations extracted from the German and English Wikipedias. Second, the cross-lingual relatedness thesaurus which can be used in experiments involving interactive cross-lingual query expansion.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="michelbacher-etal-2010-building">
<titleInfo>
<title>Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Michelbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Laws</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beate</namePart>
<namePart type="family">Dorow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Heid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hinrich</namePart>
<namePart type="family">Schütze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Internet is an ever growing source of information stored in documents of different languages. Hence, cross-lingual resources are needed for more and more NLP applications. This paper presents (i) a graph-based method for creating one such resource and (ii) a resource created using the method, a cross-lingual relatedness thesaurus. Given a word in one language, the thesaurus suggests words in a second language that are semantically related. The method requires two monolingual corpora and a basic dictionary. Our general approach is to build two monolingual word graphs, with nodes representing words and edges representing linguistic relations between words. A bilingual dictionary containing basic vocabulary provides seed translations relating nodes from both graphs. We then use an inter-graph node-similarity algorithm to discover related words. Evaluation with three human judges revealed that 49% of the English and 57% of the German words discovered by our method are semantically related to the target words. We publish two resources in conjunction with this paper. First, noun coordinations extracted from the German and English Wikipedias. Second, the cross-lingual relatedness thesaurus which can be used in experiments involving interactive cross-lingual query expansion.</abstract>
<identifier type="citekey">michelbacher-etal-2010-building</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure
%A Michelbacher, Lukas
%A Laws, Florian
%A Dorow, Beate
%A Heid, Ulrich
%A Schütze, Hinrich
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F michelbacher-etal-2010-building
%X The Internet is an ever growing source of information stored in documents of different languages. Hence, cross-lingual resources are needed for more and more NLP applications. This paper presents (i) a graph-based method for creating one such resource and (ii) a resource created using the method, a cross-lingual relatedness thesaurus. Given a word in one language, the thesaurus suggests words in a second language that are semantically related. The method requires two monolingual corpora and a basic dictionary. Our general approach is to build two monolingual word graphs, with nodes representing words and edges representing linguistic relations between words. A bilingual dictionary containing basic vocabulary provides seed translations relating nodes from both graphs. We then use an inter-graph node-similarity algorithm to discover related words. Evaluation with three human judges revealed that 49% of the English and 57% of the German words discovered by our method are semantically related to the target words. We publish two resources in conjunction with this paper. First, noun coordinations extracted from the German and English Wikipedias. Second, the cross-lingual relatedness thesaurus which can be used in experiments involving interactive cross-lingual query expansion.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf
Markdown (Informal)
[Building a Cross-lingual Relatedness Thesaurus using a Graph Similarity Measure](http://www.lrec-conf.org/proceedings/lrec2010/pdf/499_Paper.pdf) (Michelbacher et al., LREC 2010)
ACL