@inproceedings{li-etal-2023-hw-tsc,
title = "{HW}-{TSC} 2023 Submission for the Quality Estimation Shared Task",
author = "Li, Yuang and
Su, Chang and
Zhu, Ming and
Piao, Mengyao and
Lyu, Xinglin and
Zhang, Min and
Yang, Hao",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.72",
doi = "10.18653/v1/2023.wmt-1.72",
pages = "835--840",
abstract = "Quality estimation (QE) is an essential technique to assess machine translation quality without reference translations. In this paper, we focus on Huawei Translation Services Center{'}s (HW-TSC{'}s) submission to the sentence-level QE shared task, named Ensemble-CrossQE. Our system uses CrossQE, the same model architecture as our last year{'}s submission, which consists of a multilingual base model and a task-specific downstream layer. The input is the concatenation of the source and the translated sentences. To enhance the performance, we finetuned and ensembled multiple base models such as XLM-R, InfoXLM, RemBERT and CometKiwi. Moreover, we introduce a new corruption-based data augmentation method, which generates deletion, substitution and insertion errors in the original translation and uses a reference-based QE model to obtain pseudo scores. Results show that our system achieves impressive performance on sentence-level QE test sets and ranked the first place for three language pairs: English-Hindi, English-Tamil and English-Telegu. In addition, we participated in the error span detection task. The submitted model outperforms the baseline on Chinese-English and Hebrew-English language pairs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2023-hw-tsc">
<titleInfo>
<title>HW-TSC 2023 Submission for the Quality Estimation Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chang</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mengyao</namePart>
<namePart type="family">Piao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinglin</namePart>
<namePart type="family">Lyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Quality estimation (QE) is an essential technique to assess machine translation quality without reference translations. In this paper, we focus on Huawei Translation Services Center’s (HW-TSC’s) submission to the sentence-level QE shared task, named Ensemble-CrossQE. Our system uses CrossQE, the same model architecture as our last year’s submission, which consists of a multilingual base model and a task-specific downstream layer. The input is the concatenation of the source and the translated sentences. To enhance the performance, we finetuned and ensembled multiple base models such as XLM-R, InfoXLM, RemBERT and CometKiwi. Moreover, we introduce a new corruption-based data augmentation method, which generates deletion, substitution and insertion errors in the original translation and uses a reference-based QE model to obtain pseudo scores. Results show that our system achieves impressive performance on sentence-level QE test sets and ranked the first place for three language pairs: English-Hindi, English-Tamil and English-Telegu. In addition, we participated in the error span detection task. The submitted model outperforms the baseline on Chinese-English and Hebrew-English language pairs.</abstract>
<identifier type="citekey">li-etal-2023-hw-tsc</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.72</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.72</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>835</start>
<end>840</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HW-TSC 2023 Submission for the Quality Estimation Shared Task
%A Li, Yuang
%A Su, Chang
%A Zhu, Ming
%A Piao, Mengyao
%A Lyu, Xinglin
%A Zhang, Min
%A Yang, Hao
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F li-etal-2023-hw-tsc
%X Quality estimation (QE) is an essential technique to assess machine translation quality without reference translations. In this paper, we focus on Huawei Translation Services Center’s (HW-TSC’s) submission to the sentence-level QE shared task, named Ensemble-CrossQE. Our system uses CrossQE, the same model architecture as our last year’s submission, which consists of a multilingual base model and a task-specific downstream layer. The input is the concatenation of the source and the translated sentences. To enhance the performance, we finetuned and ensembled multiple base models such as XLM-R, InfoXLM, RemBERT and CometKiwi. Moreover, we introduce a new corruption-based data augmentation method, which generates deletion, substitution and insertion errors in the original translation and uses a reference-based QE model to obtain pseudo scores. Results show that our system achieves impressive performance on sentence-level QE test sets and ranked the first place for three language pairs: English-Hindi, English-Tamil and English-Telegu. In addition, we participated in the error span detection task. The submitted model outperforms the baseline on Chinese-English and Hebrew-English language pairs.
%R 10.18653/v1/2023.wmt-1.72
%U https://aclanthology.org/2023.wmt-1.72
%U https://doi.org/10.18653/v1/2023.wmt-1.72
%P 835-840
Markdown (Informal)
[HW-TSC 2023 Submission for the Quality Estimation Shared Task](https://aclanthology.org/2023.wmt-1.72) (Li et al., WMT 2023)
ACL