@inproceedings{eo-etal-2021-dealing,
title = "Dealing with the Paradox of Quality Estimation",
author = "Eo, Sugyeong and
Park, Chanjun and
Moon, Hyeonseok and
Seo, Jaehyung and
Lim, Heuiseok",
editor = "Ortega, John and
Ojha, Atul Kr. and
Kann, Katharina and
Liu, Chao-Hong",
booktitle = "Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-loresmt.1",
pages = "1--10",
abstract = "In quality estimation (QE), the quality of translation can be predicted by referencing the source sentence and the machine translation (MT) output without access to the reference sentence. However, there exists a paradox in that constructing a dataset for creating a QE model requires non-trivial human labor and time, and it may even requires additional effort compared to the cost of constructing a parallel corpus. In this study, to address this paradox and utilize the various applications of QE, even in low-resource languages (LRLs), we propose a method for automatically constructing a pseudo-QE dataset without using human labor. We perform a comparative analysis on the pseudo-QE dataset using multilingual pre-trained language models. As we generate the pseudo dataset, we conduct experiments using various external machine translators as test sets to verify the accuracy of the results objectively. Also, the experimental results show that multilingual BART demonstrates the best performance, and we confirm the applicability of QE in LRLs using pseudo-QE dataset construction methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eo-etal-2021-dealing">
<titleInfo>
<title>Dealing with the Paradox of Quality Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sugyeong</namePart>
<namePart type="family">Eo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanjun</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyeonseok</namePart>
<namePart type="family">Moon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaehyung</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heuiseok</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ortega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-Hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In quality estimation (QE), the quality of translation can be predicted by referencing the source sentence and the machine translation (MT) output without access to the reference sentence. However, there exists a paradox in that constructing a dataset for creating a QE model requires non-trivial human labor and time, and it may even requires additional effort compared to the cost of constructing a parallel corpus. In this study, to address this paradox and utilize the various applications of QE, even in low-resource languages (LRLs), we propose a method for automatically constructing a pseudo-QE dataset without using human labor. We perform a comparative analysis on the pseudo-QE dataset using multilingual pre-trained language models. As we generate the pseudo dataset, we conduct experiments using various external machine translators as test sets to verify the accuracy of the results objectively. Also, the experimental results show that multilingual BART demonstrates the best performance, and we confirm the applicability of QE in LRLs using pseudo-QE dataset construction methods.</abstract>
<identifier type="citekey">eo-etal-2021-dealing</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-loresmt.1</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dealing with the Paradox of Quality Estimation
%A Eo, Sugyeong
%A Park, Chanjun
%A Moon, Hyeonseok
%A Seo, Jaehyung
%A Lim, Heuiseok
%Y Ortega, John
%Y Ojha, Atul Kr.
%Y Kann, Katharina
%Y Liu, Chao-Hong
%S Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021)
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F eo-etal-2021-dealing
%X In quality estimation (QE), the quality of translation can be predicted by referencing the source sentence and the machine translation (MT) output without access to the reference sentence. However, there exists a paradox in that constructing a dataset for creating a QE model requires non-trivial human labor and time, and it may even requires additional effort compared to the cost of constructing a parallel corpus. In this study, to address this paradox and utilize the various applications of QE, even in low-resource languages (LRLs), we propose a method for automatically constructing a pseudo-QE dataset without using human labor. We perform a comparative analysis on the pseudo-QE dataset using multilingual pre-trained language models. As we generate the pseudo dataset, we conduct experiments using various external machine translators as test sets to verify the accuracy of the results objectively. Also, the experimental results show that multilingual BART demonstrates the best performance, and we confirm the applicability of QE in LRLs using pseudo-QE dataset construction methods.
%U https://aclanthology.org/2021.mtsummit-loresmt.1
%P 1-10
Markdown (Informal)
[Dealing with the Paradox of Quality Estimation](https://aclanthology.org/2021.mtsummit-loresmt.1) (Eo et al., LoResMT 2021)
ACL
- Sugyeong Eo, Chanjun Park, Hyeonseok Moon, Jaehyung Seo, and Heuiseok Lim. 2021. Dealing with the Paradox of Quality Estimation. In Proceedings of the 4th Workshop on Technologies for MT of Low Resource Languages (LoResMT2021), pages 1–10, Virtual. Association for Machine Translation in the Americas.