@inproceedings{cai-xiong-2020-test,
title = "A Test Suite for Evaluating Discourse Phenomena in Document-level Neural Machine Translation",
author = "Cai, Xinyi and
Xiong, Deyi",
editor = "Liu, Qun and
Xiong, Deyi and
Ge, Shili and
Zhang, Xiaojun",
booktitle = "Proceedings of the Second International Workshop of Discourse Processing",
month = dec,
year = "2020",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.iwdp-1.3",
pages = "13--17",
abstract = "The need to evaluate the ability of context-aware neural machine translation (NMT) models in dealing with specific discourse phenomena arises in document-level NMT. However, test sets that satisfy this need are rare. In this paper, we propose a test suite to evaluate three common discourse phenomena in English-Chinese translation: pronoun, discourse connective and ellipsis where discourse divergences lie across the two languages. The test suite contains 1,200 instances, 400 for each type of discourse phenomena. We perform both automatic and human evaluation with three state-of-the-art context-aware NMT models on the proposed test suite. Results suggest that our test suite can be used as a challenging benchmark test bed for evaluating document-level NMT. The test suite will be publicly available soon.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cai-xiong-2020-test">
<titleInfo>
<title>A Test Suite for Evaluating Discourse Phenomena in Document-level Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinyi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deyi</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second International Workshop of Discourse Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deyi</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shili</namePart>
<namePart type="family">Ge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The need to evaluate the ability of context-aware neural machine translation (NMT) models in dealing with specific discourse phenomena arises in document-level NMT. However, test sets that satisfy this need are rare. In this paper, we propose a test suite to evaluate three common discourse phenomena in English-Chinese translation: pronoun, discourse connective and ellipsis where discourse divergences lie across the two languages. The test suite contains 1,200 instances, 400 for each type of discourse phenomena. We perform both automatic and human evaluation with three state-of-the-art context-aware NMT models on the proposed test suite. Results suggest that our test suite can be used as a challenging benchmark test bed for evaluating document-level NMT. The test suite will be publicly available soon.</abstract>
<identifier type="citekey">cai-xiong-2020-test</identifier>
<location>
<url>https://aclanthology.org/2020.iwdp-1.3</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>13</start>
<end>17</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Test Suite for Evaluating Discourse Phenomena in Document-level Neural Machine Translation
%A Cai, Xinyi
%A Xiong, Deyi
%Y Liu, Qun
%Y Xiong, Deyi
%Y Ge, Shili
%Y Zhang, Xiaojun
%S Proceedings of the Second International Workshop of Discourse Processing
%D 2020
%8 December
%I Association for Computational Linguistics
%C Suzhou, China
%F cai-xiong-2020-test
%X The need to evaluate the ability of context-aware neural machine translation (NMT) models in dealing with specific discourse phenomena arises in document-level NMT. However, test sets that satisfy this need are rare. In this paper, we propose a test suite to evaluate three common discourse phenomena in English-Chinese translation: pronoun, discourse connective and ellipsis where discourse divergences lie across the two languages. The test suite contains 1,200 instances, 400 for each type of discourse phenomena. We perform both automatic and human evaluation with three state-of-the-art context-aware NMT models on the proposed test suite. Results suggest that our test suite can be used as a challenging benchmark test bed for evaluating document-level NMT. The test suite will be publicly available soon.
%U https://aclanthology.org/2020.iwdp-1.3
%P 13-17
Markdown (Informal)
[A Test Suite for Evaluating Discourse Phenomena in Document-level Neural Machine Translation](https://aclanthology.org/2020.iwdp-1.3) (Cai & Xiong, iwdp 2020)
ACL