@inproceedings{lou-etal-2023-cceval,
title = "{CCE}val: A Representative Evaluation Benchmark for the {C}hinese-centric Multilingual Machine Translation",
author = "Lou, Lianzhang and
Yin, Xi and
Xie, Yutao and
Xiang, Yang",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.682",
doi = "10.18653/v1/2023.findings-emnlp.682",
pages = "10176--10184",
abstract = "The Chinese-centric Multilingual Machine Translation (MMT) has gained more importance recently due to increasing demands from international business development and cross-cultural exchanges. However, an important factor that limits the progress of this area is the lack of highly representative and high-quality evaluation benchmarks. To fill this gap, we propose CCEval, an impartial and representative Chinese-centric MMT evaluation dataset. This benchmark dataset consists of 2500 Chinese sentences we meticulously selected and processed, and covers more diverse linguistic features as compared to other MMT evaluation benchmarks. These sentences have been translated into 11 languages of various resource levels by professional translators via a rigorously controlled process pipeline to ensure their high quality. We conduct experiments to demonstrate our sampling methodology{'}s effectiveness in constructing evaluation datasets strongly correlated with human evaluations. The resulting dataset enables better assessments of the Chinese-centric MMT quality. Our CCEval benchmark dataset is available at https://bright.pcl.ac.cn/en/offlineTasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lou-etal-2023-cceval">
<titleInfo>
<title>CCEval: A Representative Evaluation Benchmark for the Chinese-centric Multilingual Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lianzhang</namePart>
<namePart type="family">Lou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xi</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutao</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Chinese-centric Multilingual Machine Translation (MMT) has gained more importance recently due to increasing demands from international business development and cross-cultural exchanges. However, an important factor that limits the progress of this area is the lack of highly representative and high-quality evaluation benchmarks. To fill this gap, we propose CCEval, an impartial and representative Chinese-centric MMT evaluation dataset. This benchmark dataset consists of 2500 Chinese sentences we meticulously selected and processed, and covers more diverse linguistic features as compared to other MMT evaluation benchmarks. These sentences have been translated into 11 languages of various resource levels by professional translators via a rigorously controlled process pipeline to ensure their high quality. We conduct experiments to demonstrate our sampling methodology’s effectiveness in constructing evaluation datasets strongly correlated with human evaluations. The resulting dataset enables better assessments of the Chinese-centric MMT quality. Our CCEval benchmark dataset is available at https://bright.pcl.ac.cn/en/offlineTasks.</abstract>
<identifier type="citekey">lou-etal-2023-cceval</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.682</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.682</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>10176</start>
<end>10184</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CCEval: A Representative Evaluation Benchmark for the Chinese-centric Multilingual Machine Translation
%A Lou, Lianzhang
%A Yin, Xi
%A Xie, Yutao
%A Xiang, Yang
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F lou-etal-2023-cceval
%X The Chinese-centric Multilingual Machine Translation (MMT) has gained more importance recently due to increasing demands from international business development and cross-cultural exchanges. However, an important factor that limits the progress of this area is the lack of highly representative and high-quality evaluation benchmarks. To fill this gap, we propose CCEval, an impartial and representative Chinese-centric MMT evaluation dataset. This benchmark dataset consists of 2500 Chinese sentences we meticulously selected and processed, and covers more diverse linguistic features as compared to other MMT evaluation benchmarks. These sentences have been translated into 11 languages of various resource levels by professional translators via a rigorously controlled process pipeline to ensure their high quality. We conduct experiments to demonstrate our sampling methodology’s effectiveness in constructing evaluation datasets strongly correlated with human evaluations. The resulting dataset enables better assessments of the Chinese-centric MMT quality. Our CCEval benchmark dataset is available at https://bright.pcl.ac.cn/en/offlineTasks.
%R 10.18653/v1/2023.findings-emnlp.682
%U https://aclanthology.org/2023.findings-emnlp.682
%U https://doi.org/10.18653/v1/2023.findings-emnlp.682
%P 10176-10184
Markdown (Informal)
[CCEval: A Representative Evaluation Benchmark for the Chinese-centric Multilingual Machine Translation](https://aclanthology.org/2023.findings-emnlp.682) (Lou et al., Findings 2023)
ACL