@inproceedings{oliver-climent-2014-automatic,
title = "Automatic creation of {W}ord{N}ets from parallel corpora",
author = "Oliver, Antoni and
Climent, Salvador",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/121_Paper.pdf",
pages = "1112--1116",
abstract = "In this paper we present the evaluation results for the creation of WordNets for five languages (Spanish, French, German, Italian and Portuguese) using an approach based on parallel corpora. We have used three very large parallel corpora for our experiments: DGT-TM, EMEA and ECB. The English part of each corpus is semantically tagged using Freeling and UKB. After this step, the process of WordNet creation is converted into a word alignment problem, where we want to alignWordNet synsets in the English part of the corpus with lemmata on the target language part of the corpus. The word alignment algorithm used in these experiments is a simple most frequent translation algorithm implemented into the WN-Toolkit. The obtained precision values are quite satisfactory, but the overall number of extracted synset-variant pairs is too low, leading into very poor recall values. In the conclusions, the use of more advanced word alignment algorithms, such as Giza++, Fast Align or Berkeley aligner is suggested.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oliver-climent-2014-automatic">
<titleInfo>
<title>Automatic creation of WordNets from parallel corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoni</namePart>
<namePart type="family">Oliver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salvador</namePart>
<namePart type="family">Climent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present the evaluation results for the creation of WordNets for five languages (Spanish, French, German, Italian and Portuguese) using an approach based on parallel corpora. We have used three very large parallel corpora for our experiments: DGT-TM, EMEA and ECB. The English part of each corpus is semantically tagged using Freeling and UKB. After this step, the process of WordNet creation is converted into a word alignment problem, where we want to alignWordNet synsets in the English part of the corpus with lemmata on the target language part of the corpus. The word alignment algorithm used in these experiments is a simple most frequent translation algorithm implemented into the WN-Toolkit. The obtained precision values are quite satisfactory, but the overall number of extracted synset-variant pairs is too low, leading into very poor recall values. In the conclusions, the use of more advanced word alignment algorithms, such as Giza++, Fast Align or Berkeley aligner is suggested.</abstract>
<identifier type="citekey">oliver-climent-2014-automatic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/121_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>1112</start>
<end>1116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic creation of WordNets from parallel corpora
%A Oliver, Antoni
%A Climent, Salvador
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F oliver-climent-2014-automatic
%X In this paper we present the evaluation results for the creation of WordNets for five languages (Spanish, French, German, Italian and Portuguese) using an approach based on parallel corpora. We have used three very large parallel corpora for our experiments: DGT-TM, EMEA and ECB. The English part of each corpus is semantically tagged using Freeling and UKB. After this step, the process of WordNet creation is converted into a word alignment problem, where we want to alignWordNet synsets in the English part of the corpus with lemmata on the target language part of the corpus. The word alignment algorithm used in these experiments is a simple most frequent translation algorithm implemented into the WN-Toolkit. The obtained precision values are quite satisfactory, but the overall number of extracted synset-variant pairs is too low, leading into very poor recall values. In the conclusions, the use of more advanced word alignment algorithms, such as Giza++, Fast Align or Berkeley aligner is suggested.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/121_Paper.pdf
%P 1112-1116
Markdown (Informal)
[Automatic creation of WordNets from parallel corpora](http://www.lrec-conf.org/proceedings/lrec2014/pdf/121_Paper.pdf) (Oliver & Climent, LREC 2014)
ACL
- Antoni Oliver and Salvador Climent. 2014. Automatic creation of WordNets from parallel corpora. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 1112–1116, Reykjavik, Iceland. European Language Resources Association (ELRA).