@inproceedings{lei-etal-2024-evaluation-dataset,
title = "Evaluation Dataset for Lexical Translation Consistency in {C}hinese-to-{E}nglish Document-level Translation",
author = "Lei, Xiangyu and
Li, Junhui and
Tao, Shimin and
Yang, Hao",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.583",
pages = "6575--6581",
abstract = "Lexical translation consistency is one of the most common discourse phenomena in Chinese-to-English document-level translation. To better evaluate the performance of lexical translation consistency, previous researches assumes that all repeated source words should be translated consistently. However, constraining translations of repeated source words to be consistent will hurt word diversity and human translators tend to use different words in translation. Therefore, in this paper we construct a test set of 310 bilingual news articles to properly evaluate lexical translation consistency. We manually differentiate those repeated source words whose translations are consistent into two types: true consistency and false consistency. Then based on the constructed test set, we evaluate the performance of lexical translation consistency for several typical NMT systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lei-etal-2024-evaluation-dataset">
<titleInfo>
<title>Evaluation Dataset for Lexical Translation Consistency in Chinese-to-English Document-level Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Lei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junhui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shimin</namePart>
<namePart type="family">Tao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Lexical translation consistency is one of the most common discourse phenomena in Chinese-to-English document-level translation. To better evaluate the performance of lexical translation consistency, previous researches assumes that all repeated source words should be translated consistently. However, constraining translations of repeated source words to be consistent will hurt word diversity and human translators tend to use different words in translation. Therefore, in this paper we construct a test set of 310 bilingual news articles to properly evaluate lexical translation consistency. We manually differentiate those repeated source words whose translations are consistent into two types: true consistency and false consistency. Then based on the constructed test set, we evaluate the performance of lexical translation consistency for several typical NMT systems.</abstract>
<identifier type="citekey">lei-etal-2024-evaluation-dataset</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.583</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6575</start>
<end>6581</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation Dataset for Lexical Translation Consistency in Chinese-to-English Document-level Translation
%A Lei, Xiangyu
%A Li, Junhui
%A Tao, Shimin
%A Yang, Hao
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F lei-etal-2024-evaluation-dataset
%X Lexical translation consistency is one of the most common discourse phenomena in Chinese-to-English document-level translation. To better evaluate the performance of lexical translation consistency, previous researches assumes that all repeated source words should be translated consistently. However, constraining translations of repeated source words to be consistent will hurt word diversity and human translators tend to use different words in translation. Therefore, in this paper we construct a test set of 310 bilingual news articles to properly evaluate lexical translation consistency. We manually differentiate those repeated source words whose translations are consistent into two types: true consistency and false consistency. Then based on the constructed test set, we evaluate the performance of lexical translation consistency for several typical NMT systems.
%U https://aclanthology.org/2024.lrec-main.583
%P 6575-6581
Markdown (Informal)
[Evaluation Dataset for Lexical Translation Consistency in Chinese-to-English Document-level Translation](https://aclanthology.org/2024.lrec-main.583) (Lei et al., LREC-COLING 2024)
ACL