@inproceedings{parton-etal-2012-lost,
title = "Lost {\&} Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval",
author = "Parton, Kristen and
Habash, Nizar and
McKeown, Kathleen",
booktitle = "Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 28-" # nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-papers.12",
abstract = "In an ideal cross-lingual information retrieval (CLIR) system, a user query would generate a search over documents in a different language and the relevant results would be presented in the user{'}s language. In practice, CLIR systems are typically evaluated by judging result relevance in the document language, to factor out the effects of translating the results using machine translation (MT). In this paper, we investigate the influence of four different approaches for integrating MT and CLIR on both retrieval accuracy and user judgment of relevancy. We create a corpus with relevance judgments for both human and machine translated results, and use it to quantify the effect that MT quality has on end-to-end relevance. We find that MT errors result in a 16-39{\%} decrease in mean average precision over the ground truth system that uses human translations. MT errors also caused relevant sentences to appear irrelevant {--} 5-19{\%} of sentences were relevant in human translation, but were judged irrelevant in MT. To counter this degradation, we present two hybrid retrieval models and two automatic MT post-editing techniques and show that these approaches substantially mitigate the errors and improve the end-to-end relevance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parton-etal-2012-lost">
<titleInfo>
<title>Lost & Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristen</namePart>
<namePart type="family">Parton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathleen</namePart>
<namePart type="family">McKeown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-oct 28-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In an ideal cross-lingual information retrieval (CLIR) system, a user query would generate a search over documents in a different language and the relevant results would be presented in the user’s language. In practice, CLIR systems are typically evaluated by judging result relevance in the document language, to factor out the effects of translating the results using machine translation (MT). In this paper, we investigate the influence of four different approaches for integrating MT and CLIR on both retrieval accuracy and user judgment of relevancy. We create a corpus with relevance judgments for both human and machine translated results, and use it to quantify the effect that MT quality has on end-to-end relevance. We find that MT errors result in a 16-39% decrease in mean average precision over the ground truth system that uses human translations. MT errors also caused relevant sentences to appear irrelevant – 5-19% of sentences were relevant in human translation, but were judged irrelevant in MT. To counter this degradation, we present two hybrid retrieval models and two automatic MT post-editing techniques and show that these approaches substantially mitigate the errors and improve the end-to-end relevance.</abstract>
<identifier type="citekey">parton-etal-2012-lost</identifier>
<location>
<url>https://aclanthology.org/2012.amta-papers.12</url>
</location>
<part>
<date>2012-oct 28-nov 1</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lost & Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval
%A Parton, Kristen
%A Habash, Nizar
%A McKeown, Kathleen
%S Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2012
%8 oct 28 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F parton-etal-2012-lost
%X In an ideal cross-lingual information retrieval (CLIR) system, a user query would generate a search over documents in a different language and the relevant results would be presented in the user’s language. In practice, CLIR systems are typically evaluated by judging result relevance in the document language, to factor out the effects of translating the results using machine translation (MT). In this paper, we investigate the influence of four different approaches for integrating MT and CLIR on both retrieval accuracy and user judgment of relevancy. We create a corpus with relevance judgments for both human and machine translated results, and use it to quantify the effect that MT quality has on end-to-end relevance. We find that MT errors result in a 16-39% decrease in mean average precision over the ground truth system that uses human translations. MT errors also caused relevant sentences to appear irrelevant – 5-19% of sentences were relevant in human translation, but were judged irrelevant in MT. To counter this degradation, we present two hybrid retrieval models and two automatic MT post-editing techniques and show that these approaches substantially mitigate the errors and improve the end-to-end relevance.
%U https://aclanthology.org/2012.amta-papers.12
Markdown (Informal)
[Lost & Found in Translation: Impact of Machine Translated Results on Translingual Information Retrieval](https://aclanthology.org/2012.amta-papers.12) (Parton et al., AMTA 2012)
ACL