@inproceedings{ma-etal-2011-confidence,
title = "From the Confidence Estimation of Machine Translation to the Integration of {MT} and Translation Memory",
author = "Ma, Yanjun and
He, Yifan and
van Genabith, Josef",
booktitle = "Proceedings of Machine Translation Summit XIII: Tutorial Abstracts",
month = sep # " 19",
year = "2011",
address = "Xiamen, China",
url = "https://aclanthology.org/2011.mtsummit-tutorials.2",
abstract = "In this tutorial, we cover techniques that facilitate the integration of Machine Translation (MT) and Translation Memory (TM), which can help the adoption of MT technology in localisation industry. The tutorial covers four parts: i) brief introduction of MT and TM systems, ii) MT confidence estimation measures tailored for the TM environment, iii) segment-level MT and MT integration, iv) sub-segment level MT and TM integration, and v) human evaluation of MT and TM integration. We will first briefly describe and compare how translations are generated in MT and TM systems, and suggest possible avenues to combines these two systems. We will also cover current quality / cost estimation measures applied in MT and TM systems, such as the fuzzy-match score in the TM, and the evaluation/confidence metrics used to judge MT outputs. We then move on to introduce the recent developments in the field of MT confidence estimation tailored towards predicting post-editing efforts. We will especially focus on the confidence metrics proposed by Specia et al., which is shown to have high correlation with human preference, as well as post-editing time. For segment-level MT and TM integration, we present translation recommendation and translation re-ranking models, where the integration happens at the 1-best or the N-best level, respectively. Given an input to be translated, MT-TM recommendation compares the output from the MT and the TM systems, and presents the better one to the post-editor. MT-TM re-ranking, on the other hand, combines k-best lists from both systems, and generates a new list according to estimated post-editing effort. We observe high precision of these models in automatic and human evaluations, indicating that they can be integrated into TM environments without the risk of deteriorating the quality of the post-editing candidate. For sub-segment level MT and TM integration, we try to reuse high quality TM chunks to improve the quality of MT systems. We can also predict whether phrase pairs derived from fuzzy matches should be used to constrain the translation of an input segment. Using a series of linguistically- motivated features, our constraints lead both to more consistent translation output, and to improved translation quality, as is measured by automatic evaluation scores. Finally, we present several methodologies that can be used to track post-editing effort, perform human evaluation of MT-TM integration, or help translators to access MT outputs in a TM environment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2011-confidence">
<titleInfo>
<title>From the Confidence Estimation of Machine Translation to the Integration of MT and Translation Memory</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanjun</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">van Genabith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-sep 19</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XIII: Tutorial Abstracts</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Xiamen, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this tutorial, we cover techniques that facilitate the integration of Machine Translation (MT) and Translation Memory (TM), which can help the adoption of MT technology in localisation industry. The tutorial covers four parts: i) brief introduction of MT and TM systems, ii) MT confidence estimation measures tailored for the TM environment, iii) segment-level MT and MT integration, iv) sub-segment level MT and TM integration, and v) human evaluation of MT and TM integration. We will first briefly describe and compare how translations are generated in MT and TM systems, and suggest possible avenues to combines these two systems. We will also cover current quality / cost estimation measures applied in MT and TM systems, such as the fuzzy-match score in the TM, and the evaluation/confidence metrics used to judge MT outputs. We then move on to introduce the recent developments in the field of MT confidence estimation tailored towards predicting post-editing efforts. We will especially focus on the confidence metrics proposed by Specia et al., which is shown to have high correlation with human preference, as well as post-editing time. For segment-level MT and TM integration, we present translation recommendation and translation re-ranking models, where the integration happens at the 1-best or the N-best level, respectively. Given an input to be translated, MT-TM recommendation compares the output from the MT and the TM systems, and presents the better one to the post-editor. MT-TM re-ranking, on the other hand, combines k-best lists from both systems, and generates a new list according to estimated post-editing effort. We observe high precision of these models in automatic and human evaluations, indicating that they can be integrated into TM environments without the risk of deteriorating the quality of the post-editing candidate. For sub-segment level MT and TM integration, we try to reuse high quality TM chunks to improve the quality of MT systems. We can also predict whether phrase pairs derived from fuzzy matches should be used to constrain the translation of an input segment. Using a series of linguistically- motivated features, our constraints lead both to more consistent translation output, and to improved translation quality, as is measured by automatic evaluation scores. Finally, we present several methodologies that can be used to track post-editing effort, perform human evaluation of MT-TM integration, or help translators to access MT outputs in a TM environment.</abstract>
<identifier type="citekey">ma-etal-2011-confidence</identifier>
<location>
<url>https://aclanthology.org/2011.mtsummit-tutorials.2</url>
</location>
<part>
<date>2011-sep 19</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From the Confidence Estimation of Machine Translation to the Integration of MT and Translation Memory
%A Ma, Yanjun
%A He, Yifan
%A van Genabith, Josef
%S Proceedings of Machine Translation Summit XIII: Tutorial Abstracts
%D 2011
%8 sep 19
%C Xiamen, China
%F ma-etal-2011-confidence
%X In this tutorial, we cover techniques that facilitate the integration of Machine Translation (MT) and Translation Memory (TM), which can help the adoption of MT technology in localisation industry. The tutorial covers four parts: i) brief introduction of MT and TM systems, ii) MT confidence estimation measures tailored for the TM environment, iii) segment-level MT and MT integration, iv) sub-segment level MT and TM integration, and v) human evaluation of MT and TM integration. We will first briefly describe and compare how translations are generated in MT and TM systems, and suggest possible avenues to combines these two systems. We will also cover current quality / cost estimation measures applied in MT and TM systems, such as the fuzzy-match score in the TM, and the evaluation/confidence metrics used to judge MT outputs. We then move on to introduce the recent developments in the field of MT confidence estimation tailored towards predicting post-editing efforts. We will especially focus on the confidence metrics proposed by Specia et al., which is shown to have high correlation with human preference, as well as post-editing time. For segment-level MT and TM integration, we present translation recommendation and translation re-ranking models, where the integration happens at the 1-best or the N-best level, respectively. Given an input to be translated, MT-TM recommendation compares the output from the MT and the TM systems, and presents the better one to the post-editor. MT-TM re-ranking, on the other hand, combines k-best lists from both systems, and generates a new list according to estimated post-editing effort. We observe high precision of these models in automatic and human evaluations, indicating that they can be integrated into TM environments without the risk of deteriorating the quality of the post-editing candidate. For sub-segment level MT and TM integration, we try to reuse high quality TM chunks to improve the quality of MT systems. We can also predict whether phrase pairs derived from fuzzy matches should be used to constrain the translation of an input segment. Using a series of linguistically- motivated features, our constraints lead both to more consistent translation output, and to improved translation quality, as is measured by automatic evaluation scores. Finally, we present several methodologies that can be used to track post-editing effort, perform human evaluation of MT-TM integration, or help translators to access MT outputs in a TM environment.
%U https://aclanthology.org/2011.mtsummit-tutorials.2
Markdown (Informal)
[From the Confidence Estimation of Machine Translation to the Integration of MT and Translation Memory](https://aclanthology.org/2011.mtsummit-tutorials.2) (Ma et al., MTSummit 2011)
ACL