@inproceedings{mihaltz-pohl-2006-exploiting,
title = "Exploiting Parallel Corpora for Supervised Word-Sense Disambiguation in {E}nglish-{H}ungarian Machine Translation",
author = "Mih{\'a}ltz, M{\'a}rton and
Pohl, G{\'a}bor",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/649_pdf.pdf",
abstract = "In this paper we present an experiment to automatically generate annotated training corpora for a supervised word sense disambiguation module operating in an English-Hungarian and a Hungarian-English machine translation system. Training examples for the WSD module of the MT system are produced by annotating ambiguous lexical items in the source language (words having several possible translations) with their proper target language translations. Since manually annotating training examples is very costly, we are experimenting with a method to produce examples automatically from parallel corpora. Our algorithm relies on monolingual and bilingual lexicons and dictionaries in addition to statistical methods in order to annotate examples extracted from a large English-Hungarian parallel corpus accurately aligned at sentence level. In the paper, we present an experiment with the English noun state, where we categorized the different occurrences in the Hunglish parallel corpus. For this noun, most of the examples were covered by multiword lexical items originating from our lexical sources.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mihaltz-pohl-2006-exploiting">
<titleInfo>
<title>Exploiting Parallel Corpora for Supervised Word-Sense Disambiguation in English-Hungarian Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Márton</namePart>
<namePart type="family">Miháltz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gábor</namePart>
<namePart type="family">Pohl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present an experiment to automatically generate annotated training corpora for a supervised word sense disambiguation module operating in an English-Hungarian and a Hungarian-English machine translation system. Training examples for the WSD module of the MT system are produced by annotating ambiguous lexical items in the source language (words having several possible translations) with their proper target language translations. Since manually annotating training examples is very costly, we are experimenting with a method to produce examples automatically from parallel corpora. Our algorithm relies on monolingual and bilingual lexicons and dictionaries in addition to statistical methods in order to annotate examples extracted from a large English-Hungarian parallel corpus accurately aligned at sentence level. In the paper, we present an experiment with the English noun state, where we categorized the different occurrences in the Hunglish parallel corpus. For this noun, most of the examples were covered by multiword lexical items originating from our lexical sources.</abstract>
<identifier type="citekey">mihaltz-pohl-2006-exploiting</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/649_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploiting Parallel Corpora for Supervised Word-Sense Disambiguation in English-Hungarian Machine Translation
%A Miháltz, Márton
%A Pohl, Gábor
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F mihaltz-pohl-2006-exploiting
%X In this paper we present an experiment to automatically generate annotated training corpora for a supervised word sense disambiguation module operating in an English-Hungarian and a Hungarian-English machine translation system. Training examples for the WSD module of the MT system are produced by annotating ambiguous lexical items in the source language (words having several possible translations) with their proper target language translations. Since manually annotating training examples is very costly, we are experimenting with a method to produce examples automatically from parallel corpora. Our algorithm relies on monolingual and bilingual lexicons and dictionaries in addition to statistical methods in order to annotate examples extracted from a large English-Hungarian parallel corpus accurately aligned at sentence level. In the paper, we present an experiment with the English noun state, where we categorized the different occurrences in the Hunglish parallel corpus. For this noun, most of the examples were covered by multiword lexical items originating from our lexical sources.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/649_pdf.pdf
Markdown (Informal)
[Exploiting Parallel Corpora for Supervised Word-Sense Disambiguation in English-Hungarian Machine Translation](http://www.lrec-conf.org/proceedings/lrec2006/pdf/649_pdf.pdf) (Miháltz & Pohl, LREC 2006)
ACL