@inproceedings{bhardwa-etal-2022-refining,
title = "Refining an Almost Clean Translation Memory Helps Machine Translation",
author = "Bhardwa, Shivendra and
Alfonso-Hermelo, David and
Langlais, Philippe and
Bernier-Colborne, Gabriel and
Goutte, Cyril and
Simard, Michel",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
month = sep,
year = "2022",
address = "Orlando, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2022.amta-research.16",
pages = "215--226",
abstract = "While recent studies have been dedicated to cleaning very noisy parallel corpora to improve Machine Translation training, we focus in this work on filtering a large and mostly clean Translation Memory. This problem of practical interest has not received much consideration from the community, in contrast with, for example, filtering large web-mined parallel corpora. We experiment with an extensive, multi-domain proprietary Translation Memory and compare five approaches involving deep-, feature-, and heuristic-based solutions. We propose two ways of evaluating this task, manual annotation and resulting Machine Translation quality. We report significant gains over a state-of-the-art, off-the-shelf cleaning system, using two MT engines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhardwa-etal-2022-refining">
<titleInfo>
<title>Refining an Almost Clean Translation Memory Helps Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shivendra</namePart>
<namePart type="family">Bhardwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Alfonso-Hermelo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Langlais</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Bernier-Colborne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cyril</namePart>
<namePart type="family">Goutte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Simard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Guzmán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Orlando, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While recent studies have been dedicated to cleaning very noisy parallel corpora to improve Machine Translation training, we focus in this work on filtering a large and mostly clean Translation Memory. This problem of practical interest has not received much consideration from the community, in contrast with, for example, filtering large web-mined parallel corpora. We experiment with an extensive, multi-domain proprietary Translation Memory and compare five approaches involving deep-, feature-, and heuristic-based solutions. We propose two ways of evaluating this task, manual annotation and resulting Machine Translation quality. We report significant gains over a state-of-the-art, off-the-shelf cleaning system, using two MT engines.</abstract>
<identifier type="citekey">bhardwa-etal-2022-refining</identifier>
<location>
<url>https://aclanthology.org/2022.amta-research.16</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>215</start>
<end>226</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Refining an Almost Clean Translation Memory Helps Machine Translation
%A Bhardwa, Shivendra
%A Alfonso-Hermelo, David
%A Langlais, Philippe
%A Bernier-Colborne, Gabriel
%A Goutte, Cyril
%A Simard, Michel
%Y Duh, Kevin
%Y Guzmán, Francisco
%S Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)
%D 2022
%8 September
%I Association for Machine Translation in the Americas
%C Orlando, USA
%F bhardwa-etal-2022-refining
%X While recent studies have been dedicated to cleaning very noisy parallel corpora to improve Machine Translation training, we focus in this work on filtering a large and mostly clean Translation Memory. This problem of practical interest has not received much consideration from the community, in contrast with, for example, filtering large web-mined parallel corpora. We experiment with an extensive, multi-domain proprietary Translation Memory and compare five approaches involving deep-, feature-, and heuristic-based solutions. We propose two ways of evaluating this task, manual annotation and resulting Machine Translation quality. We report significant gains over a state-of-the-art, off-the-shelf cleaning system, using two MT engines.
%U https://aclanthology.org/2022.amta-research.16
%P 215-226
Markdown (Informal)
[Refining an Almost Clean Translation Memory Helps Machine Translation](https://aclanthology.org/2022.amta-research.16) (Bhardwa et al., AMTA 2022)
ACL
- Shivendra Bhardwa, David Alfonso-Hermelo, Philippe Langlais, Gabriel Bernier-Colborne, Cyril Goutte, and Michel Simard. 2022. Refining an Almost Clean Translation Memory Helps Machine Translation. In Proceedings of the 15th biennial conference of the Association for Machine Translation in the Americas (Volume 1: Research Track), pages 215–226, Orlando, USA. Association for Machine Translation in the Americas.