@inproceedings{lefever-hoste-2010-construction,
title = "Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation",
author = "Lefever, Els and
Hoste, V{\'e}ronique",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf",
abstract = "Given the recent trend to evaluate the performance of word sense disambiguation systems in a more application-oriented set-up, we report on the construction of a multilingual benchmark data set for cross-lingual word sense disambiguation. The data set was created for a lexical sample of 25 English nouns, for which translations were retrieved in 5 languages, namely Dutch, German, French, Italian and Spanish. The corpus underlying the sense inventory was the parallel data set Europarl. The gold standard sense inventory was based on the automatic word alignments of the parallel corpus, which were manually verified. The resulting word alignments were used to perform a manual clustering of the translations over all languages in the parallel corpus. The inventory then served as input for the annotators of the sentences, who were asked to provide a maximum of three contextually relevant translations per language for a given focus word. The data set was released in the framework of the SemEval-2010 competition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lefever-hoste-2010-construction">
<titleInfo>
<title>Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Els</namePart>
<namePart type="family">Lefever</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Véronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given the recent trend to evaluate the performance of word sense disambiguation systems in a more application-oriented set-up, we report on the construction of a multilingual benchmark data set for cross-lingual word sense disambiguation. The data set was created for a lexical sample of 25 English nouns, for which translations were retrieved in 5 languages, namely Dutch, German, French, Italian and Spanish. The corpus underlying the sense inventory was the parallel data set Europarl. The gold standard sense inventory was based on the automatic word alignments of the parallel corpus, which were manually verified. The resulting word alignments were used to perform a manual clustering of the translations over all languages in the parallel corpus. The inventory then served as input for the annotators of the sentences, who were asked to provide a maximum of three contextually relevant translations per language for a given focus word. The data set was released in the framework of the SemEval-2010 competition.</abstract>
<identifier type="citekey">lefever-hoste-2010-construction</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation
%A Lefever, Els
%A Hoste, Véronique
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F lefever-hoste-2010-construction
%X Given the recent trend to evaluate the performance of word sense disambiguation systems in a more application-oriented set-up, we report on the construction of a multilingual benchmark data set for cross-lingual word sense disambiguation. The data set was created for a lexical sample of 25 English nouns, for which translations were retrieved in 5 languages, namely Dutch, German, French, Italian and Spanish. The corpus underlying the sense inventory was the parallel data set Europarl. The gold standard sense inventory was based on the automatic word alignments of the parallel corpus, which were manually verified. The resulting word alignments were used to perform a manual clustering of the translations over all languages in the parallel corpus. The inventory then served as input for the annotators of the sentences, who were asked to provide a maximum of three contextually relevant translations per language for a given focus word. The data set was released in the framework of the SemEval-2010 competition.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf
Markdown (Informal)
[Construction of a Benchmark Data Set for Cross-lingual Word Sense Disambiguation](http://www.lrec-conf.org/proceedings/lrec2010/pdf/34_Paper.pdf) (Lefever & Hoste, LREC 2010)
ACL