@inproceedings{habash-metsky-2008-automatic,
title = "Automatic Learning of Morphological Variations for Handling Out-of-Vocabulary Terms in {U}rdu-{E}nglish {MT}",
author = "Habash, Nizar and
Metsky, Hayden",
booktitle = "Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 21-25",
year = "2008",
address = "Waikiki, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2008.amta-papers.9",
pages = "107--116",
abstract = "We present an approach for online handling of Out-of-Vocabulary (OOV) terms in Urdu-English MT. Since Urdu is morphologically richer than English, we expect a large portion of the OOV terms to be Urdu morphological variations that are irrelevant to English. We describe an approach to automatically learn English-irrelevant (target-irrelevant) Urdu (source) morphological variation rules from standard phrase tables. These rules are learned in an unsupervised (or lightly supervised) manner by exploiting redundancy in Urdu and collocation with English translations. We use these rules to hypothesize in-vocabulary alternatives to the OOV terms. Our results show that we reduce the OOV rate from a standard baseline average of 2.6{\%} to an average of 0.3{\%} (or 89{\%} relative decrease). We also increase the BLEU score by 0.45 (absolute) and 2.8{\%} (relative) on a standard test set. A manual error analysis shows that 28{\%} of handled OOV cases produce acceptable translations in context.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="habash-metsky-2008-automatic">
<titleInfo>
<title>Automatic Learning of Morphological Variations for Handling Out-of-Vocabulary Terms in Urdu-English MT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hayden</namePart>
<namePart type="family">Metsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 21-25</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Waikiki, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an approach for online handling of Out-of-Vocabulary (OOV) terms in Urdu-English MT. Since Urdu is morphologically richer than English, we expect a large portion of the OOV terms to be Urdu morphological variations that are irrelevant to English. We describe an approach to automatically learn English-irrelevant (target-irrelevant) Urdu (source) morphological variation rules from standard phrase tables. These rules are learned in an unsupervised (or lightly supervised) manner by exploiting redundancy in Urdu and collocation with English translations. We use these rules to hypothesize in-vocabulary alternatives to the OOV terms. Our results show that we reduce the OOV rate from a standard baseline average of 2.6% to an average of 0.3% (or 89% relative decrease). We also increase the BLEU score by 0.45 (absolute) and 2.8% (relative) on a standard test set. A manual error analysis shows that 28% of handled OOV cases produce acceptable translations in context.</abstract>
<identifier type="citekey">habash-metsky-2008-automatic</identifier>
<location>
<url>https://aclanthology.org/2008.amta-papers.9</url>
</location>
<part>
<date>2008-oct 21-25</date>
<extent unit="page">
<start>107</start>
<end>116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Learning of Morphological Variations for Handling Out-of-Vocabulary Terms in Urdu-English MT
%A Habash, Nizar
%A Metsky, Hayden
%S Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2008
%8 oct 21 25
%I Association for Machine Translation in the Americas
%C Waikiki, USA
%F habash-metsky-2008-automatic
%X We present an approach for online handling of Out-of-Vocabulary (OOV) terms in Urdu-English MT. Since Urdu is morphologically richer than English, we expect a large portion of the OOV terms to be Urdu morphological variations that are irrelevant to English. We describe an approach to automatically learn English-irrelevant (target-irrelevant) Urdu (source) morphological variation rules from standard phrase tables. These rules are learned in an unsupervised (or lightly supervised) manner by exploiting redundancy in Urdu and collocation with English translations. We use these rules to hypothesize in-vocabulary alternatives to the OOV terms. Our results show that we reduce the OOV rate from a standard baseline average of 2.6% to an average of 0.3% (or 89% relative decrease). We also increase the BLEU score by 0.45 (absolute) and 2.8% (relative) on a standard test set. A manual error analysis shows that 28% of handled OOV cases produce acceptable translations in context.
%U https://aclanthology.org/2008.amta-papers.9
%P 107-116
Markdown (Informal)
[Automatic Learning of Morphological Variations for Handling Out-of-Vocabulary Terms in Urdu-English MT](https://aclanthology.org/2008.amta-papers.9) (Habash & Metsky, AMTA 2008)
ACL