@inproceedings{reeder-loehr-1998-finding,
title = "Finding the right words: an analysis of not-translated words in machine translation",
author = "Reeder, Flo and
Loehr, Dan",
editor = "Farwell, David and
Gerber, Laurie and
Hovy, Eduard",
booktitle = "Proceedings of the Third Conference of the Association for Machine Translation in the Americas: Technical Papers",
month = oct # " 28-31",
year = "1998",
address = "Langhorne, PA, USA",
publisher = "Springer",
url = "https://link.springer.com/chapter/10.1007/3-540-49478-2_32",
pages = "356--363",
abstract = "A not-translated word (NTW) is a token which a machine translation (MT) system is unable to translate, leaving it untranslated in the output. The number of not-translated words in a document is used as one measure in the evaluation of MT systems. Many MT developers agree that in order to reduce the number of NTWs in their systems, designers must increase the size or coverage of the lexicon to include these untranslated tokens, so that the system can handle them in future processing. While we accept this method for enhancing MT capabilities, in assessing the nature of NTWs in real-world documents, we found surprising results. Our study looked at the NTW output from two commercially available MT systems (Systran and Globalink) and found that lexical coverage played a relatively small role in the words marked as not translated. In fact, 45{\%} of the tokens in the list failed to translate for reasons other than that they were valid source language words not included in the MT lexicon. For instance, e-mail addresses, words already in the target language and acronyms were marked as not-translated words. This paper presents our analysis of NTWs and uses these results to argue that in addition to lexicon enhancement, MT systems could benefit from more sophisticated pre- and postprocessing of real-world documents in order to weed out such NTWs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="reeder-loehr-1998-finding">
<titleInfo>
<title>Finding the right words: an analysis of not-translated words in machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Flo</namePart>
<namePart type="family">Reeder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Loehr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>1998-oct 28-31</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Conference of the Association for Machine Translation in the Americas: Technical Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Farwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurie</namePart>
<namePart type="family">Gerber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Springer</publisher>
<place>
<placeTerm type="text">Langhorne, PA, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A not-translated word (NTW) is a token which a machine translation (MT) system is unable to translate, leaving it untranslated in the output. The number of not-translated words in a document is used as one measure in the evaluation of MT systems. Many MT developers agree that in order to reduce the number of NTWs in their systems, designers must increase the size or coverage of the lexicon to include these untranslated tokens, so that the system can handle them in future processing. While we accept this method for enhancing MT capabilities, in assessing the nature of NTWs in real-world documents, we found surprising results. Our study looked at the NTW output from two commercially available MT systems (Systran and Globalink) and found that lexical coverage played a relatively small role in the words marked as not translated. In fact, 45% of the tokens in the list failed to translate for reasons other than that they were valid source language words not included in the MT lexicon. For instance, e-mail addresses, words already in the target language and acronyms were marked as not-translated words. This paper presents our analysis of NTWs and uses these results to argue that in addition to lexicon enhancement, MT systems could benefit from more sophisticated pre- and postprocessing of real-world documents in order to weed out such NTWs.</abstract>
<identifier type="citekey">reeder-loehr-1998-finding</identifier>
<location>
<url>https://link.springer.com/chapter/10.1007/3-540-49478-2_32</url>
</location>
<part>
<date>1998-oct 28-31</date>
<extent unit="page">
<start>356</start>
<end>363</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Finding the right words: an analysis of not-translated words in machine translation
%A Reeder, Flo
%A Loehr, Dan
%Y Farwell, David
%Y Gerber, Laurie
%Y Hovy, Eduard
%S Proceedings of the Third Conference of the Association for Machine Translation in the Americas: Technical Papers
%D 1998
%8 oct 28 31
%I Springer
%C Langhorne, PA, USA
%F reeder-loehr-1998-finding
%X A not-translated word (NTW) is a token which a machine translation (MT) system is unable to translate, leaving it untranslated in the output. The number of not-translated words in a document is used as one measure in the evaluation of MT systems. Many MT developers agree that in order to reduce the number of NTWs in their systems, designers must increase the size or coverage of the lexicon to include these untranslated tokens, so that the system can handle them in future processing. While we accept this method for enhancing MT capabilities, in assessing the nature of NTWs in real-world documents, we found surprising results. Our study looked at the NTW output from two commercially available MT systems (Systran and Globalink) and found that lexical coverage played a relatively small role in the words marked as not translated. In fact, 45% of the tokens in the list failed to translate for reasons other than that they were valid source language words not included in the MT lexicon. For instance, e-mail addresses, words already in the target language and acronyms were marked as not-translated words. This paper presents our analysis of NTWs and uses these results to argue that in addition to lexicon enhancement, MT systems could benefit from more sophisticated pre- and postprocessing of real-world documents in order to weed out such NTWs.
%U https://link.springer.com/chapter/10.1007/3-540-49478-2_32
%P 356-363
Markdown (Informal)
[Finding the right words: an analysis of not-translated words in machine translation](https://link.springer.com/chapter/10.1007/3-540-49478-2_32) (Reeder & Loehr, AMTA 1998)
ACL