@inproceedings{castilho-etal-2021-dela,
title = "{DELA} Corpus - A Document-Level Corpus Annotated with Context-Related Issues",
author = "Castilho, Sheila and
Cavalheiro Camargo, Jo{\~a}o Lucas and
Menezes, Miguel and
Way, Andy",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.63",
pages = "566--577",
abstract = "Recently, the Machine Translation (MT) community has become more interested in document-level evaluation especially in light of reactions to claims of {``}human parity{''}, since examining the quality at the level of the document rather than at the sentence level allows for the assessment of suprasentential context, providing a more reliable evaluation. This paper presents a document-level corpus annotated in English with context-aware issues that arise when translating from English into Brazilian Portuguese, namely ellipsis, gender, lexical ambiguity, number, reference, and terminology, with six different domains. The corpus can be used as a challenge test set for evaluation and as a training/testing corpus for MT as well as for deep linguistic analysis of context issues. To the best of our knowledge, this is the first corpus of its kind.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="castilho-etal-2021-dela">
<titleInfo>
<title>DELA Corpus - A Document-Level Corpus Annotated with Context-Related Issues</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheila</namePart>
<namePart type="family">Castilho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="given">Lucas</namePart>
<namePart type="family">Cavalheiro Camargo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="family">Menezes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, the Machine Translation (MT) community has become more interested in document-level evaluation especially in light of reactions to claims of “human parity”, since examining the quality at the level of the document rather than at the sentence level allows for the assessment of suprasentential context, providing a more reliable evaluation. This paper presents a document-level corpus annotated in English with context-aware issues that arise when translating from English into Brazilian Portuguese, namely ellipsis, gender, lexical ambiguity, number, reference, and terminology, with six different domains. The corpus can be used as a challenge test set for evaluation and as a training/testing corpus for MT as well as for deep linguistic analysis of context issues. To the best of our knowledge, this is the first corpus of its kind.</abstract>
<identifier type="citekey">castilho-etal-2021-dela</identifier>
<location>
<url>https://aclanthology.org/2021.wmt-1.63</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>566</start>
<end>577</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DELA Corpus - A Document-Level Corpus Annotated with Context-Related Issues
%A Castilho, Sheila
%A Cavalheiro Camargo, João Lucas
%A Menezes, Miguel
%A Way, Andy
%S Proceedings of the Sixth Conference on Machine Translation
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F castilho-etal-2021-dela
%X Recently, the Machine Translation (MT) community has become more interested in document-level evaluation especially in light of reactions to claims of “human parity”, since examining the quality at the level of the document rather than at the sentence level allows for the assessment of suprasentential context, providing a more reliable evaluation. This paper presents a document-level corpus annotated in English with context-aware issues that arise when translating from English into Brazilian Portuguese, namely ellipsis, gender, lexical ambiguity, number, reference, and terminology, with six different domains. The corpus can be used as a challenge test set for evaluation and as a training/testing corpus for MT as well as for deep linguistic analysis of context issues. To the best of our knowledge, this is the first corpus of its kind.
%U https://aclanthology.org/2021.wmt-1.63
%P 566-577
Markdown (Informal)
[DELA Corpus - A Document-Level Corpus Annotated with Context-Related Issues](https://aclanthology.org/2021.wmt-1.63) (Castilho et al., WMT 2021)
ACL