@inproceedings{denkowski-neubig-2017-stronger,
title = "Stronger Baselines for Trustable Results in Neural Machine Translation",
author = "Denkowski, Michael and
Neubig, Graham",
editor = "Luong, Thang and
Birch, Alexandra and
Neubig, Graham and
Finch, Andrew",
booktitle = "Proceedings of the First Workshop on Neural Machine Translation",
month = aug,
year = "2017",
address = "Vancouver",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-3203/",
doi = "10.18653/v1/W17-3203",
pages = "18--27",
abstract = "Interest in neural machine translation has grown rapidly as its effectiveness has been demonstrated across language and data scenarios. New research regularly introduces architectural and algorithmic improvements that lead to significant gains over {\textquotedblleft}vanilla{\textquotedblright} NMT implementations. However, these new techniques are rarely evaluated in the context of previously published techniques, specifically those that are widely used in state-of-the-art production and shared-task systems. As a result, it is often difficult to determine whether improvements from research will carry over to systems deployed for real-world use. In this work, we recommend three specific methods that are relatively easy to implement and result in much stronger experimental systems. Beyond reporting significantly higher BLEU scores, we conduct an in-depth analysis of where improvements originate and what inherent weaknesses of basic NMT models are being addressed. We then compare the relative gains afforded by several other techniques proposed in the literature when starting with vanilla systems versus our stronger baselines, showing that experimental conclusions may change depending on the baseline chosen. This indicates that choosing a strong baseline is crucial for reporting reliable experimental results."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="denkowski-neubig-2017-stronger">
<titleInfo>
<title>Stronger Baselines for Trustable Results in Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Denkowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Interest in neural machine translation has grown rapidly as its effectiveness has been demonstrated across language and data scenarios. New research regularly introduces architectural and algorithmic improvements that lead to significant gains over “vanilla” NMT implementations. However, these new techniques are rarely evaluated in the context of previously published techniques, specifically those that are widely used in state-of-the-art production and shared-task systems. As a result, it is often difficult to determine whether improvements from research will carry over to systems deployed for real-world use. In this work, we recommend three specific methods that are relatively easy to implement and result in much stronger experimental systems. Beyond reporting significantly higher BLEU scores, we conduct an in-depth analysis of where improvements originate and what inherent weaknesses of basic NMT models are being addressed. We then compare the relative gains afforded by several other techniques proposed in the literature when starting with vanilla systems versus our stronger baselines, showing that experimental conclusions may change depending on the baseline chosen. This indicates that choosing a strong baseline is crucial for reporting reliable experimental results.</abstract>
<identifier type="citekey">denkowski-neubig-2017-stronger</identifier>
<identifier type="doi">10.18653/v1/W17-3203</identifier>
<location>
<url>https://aclanthology.org/W17-3203/</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>18</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Stronger Baselines for Trustable Results in Neural Machine Translation
%A Denkowski, Michael
%A Neubig, Graham
%Y Luong, Thang
%Y Birch, Alexandra
%Y Neubig, Graham
%Y Finch, Andrew
%S Proceedings of the First Workshop on Neural Machine Translation
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver
%F denkowski-neubig-2017-stronger
%X Interest in neural machine translation has grown rapidly as its effectiveness has been demonstrated across language and data scenarios. New research regularly introduces architectural and algorithmic improvements that lead to significant gains over “vanilla” NMT implementations. However, these new techniques are rarely evaluated in the context of previously published techniques, specifically those that are widely used in state-of-the-art production and shared-task systems. As a result, it is often difficult to determine whether improvements from research will carry over to systems deployed for real-world use. In this work, we recommend three specific methods that are relatively easy to implement and result in much stronger experimental systems. Beyond reporting significantly higher BLEU scores, we conduct an in-depth analysis of where improvements originate and what inherent weaknesses of basic NMT models are being addressed. We then compare the relative gains afforded by several other techniques proposed in the literature when starting with vanilla systems versus our stronger baselines, showing that experimental conclusions may change depending on the baseline chosen. This indicates that choosing a strong baseline is crucial for reporting reliable experimental results.
%R 10.18653/v1/W17-3203
%U https://aclanthology.org/W17-3203/
%U https://doi.org/10.18653/v1/W17-3203
%P 18-27
Markdown (Informal)
[Stronger Baselines for Trustable Results in Neural Machine Translation](https://aclanthology.org/W17-3203/) (Denkowski & Neubig, NGT 2017)
ACL