@inproceedings{etchegoyhen-gete-2020-case,
title = "To Case or not to case: Evaluating Casing Methods for Neural Machine Translation",
author = "Etchegoyhen, Thierry and
Gete, Harritxu",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.463/",
pages = "3752--3760",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "We present a comparative evaluation of casing methods for Neural Machine Translation, to help establish an optimal pre- and post-processing methodology. We trained and compared system variants on data prepared with the main casing methods available, namely translation of raw data without case normalisation, lowercasing with recasing, truecasing, case factors and inline casing. Machine translation models were prepared on WMT 2017 English-German and English-Turkish datasets, for all translation directions, and the evaluation includes reference metric results as well as a targeted analysis of case preservation accuracy. Inline casing, where case information is marked along lowercased words in the training data, proved to be the optimal approach overall in these experiments."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="etchegoyhen-gete-2020-case">
<titleInfo>
<title>To Case or not to case: Evaluating Casing Methods for Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Etchegoyhen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harritxu</namePart>
<namePart type="family">Gete</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We present a comparative evaluation of casing methods for Neural Machine Translation, to help establish an optimal pre- and post-processing methodology. We trained and compared system variants on data prepared with the main casing methods available, namely translation of raw data without case normalisation, lowercasing with recasing, truecasing, case factors and inline casing. Machine translation models were prepared on WMT 2017 English-German and English-Turkish datasets, for all translation directions, and the evaluation includes reference metric results as well as a targeted analysis of case preservation accuracy. Inline casing, where case information is marked along lowercased words in the training data, proved to be the optimal approach overall in these experiments.</abstract>
<identifier type="citekey">etchegoyhen-gete-2020-case</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.463/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>3752</start>
<end>3760</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T To Case or not to case: Evaluating Casing Methods for Neural Machine Translation
%A Etchegoyhen, Thierry
%A Gete, Harritxu
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G eng
%F etchegoyhen-gete-2020-case
%X We present a comparative evaluation of casing methods for Neural Machine Translation, to help establish an optimal pre- and post-processing methodology. We trained and compared system variants on data prepared with the main casing methods available, namely translation of raw data without case normalisation, lowercasing with recasing, truecasing, case factors and inline casing. Machine translation models were prepared on WMT 2017 English-German and English-Turkish datasets, for all translation directions, and the evaluation includes reference metric results as well as a targeted analysis of case preservation accuracy. Inline casing, where case information is marked along lowercased words in the training data, proved to be the optimal approach overall in these experiments.
%U https://aclanthology.org/2020.lrec-1.463/
%P 3752-3760
Markdown (Informal)
[To Case or not to case: Evaluating Casing Methods for Neural Machine Translation](https://aclanthology.org/2020.lrec-1.463/) (Etchegoyhen & Gete, LREC 2020)
ACL