@inproceedings{chen-etal-2017-cost,
title = "Cost Weighting for Neural Machine Translation Domain Adaptation",
author = "Chen, Boxing and
Cherry, Colin and
Foster, George and
Larkin, Samuel",
editor = "Luong, Thang and
Birch, Alexandra and
Neubig, Graham and
Finch, Andrew",
booktitle = "Proceedings of the First Workshop on Neural Machine Translation",
month = aug,
year = "2017",
address = "Vancouver",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-3205/",
doi = "10.18653/v1/W17-3205",
pages = "40--46",
abstract = "In this paper, we propose a new domain adaptation technique for neural machine translation called cost weighting, which is appropriate for adaptation scenarios in which a small in-domain data set and a large general-domain data set are available. Cost weighting incorporates a domain classifier into the neural machine translation training algorithm, using features derived from the encoder representation in order to distinguish in-domain from out-of-domain data. Classifier probabilities are used to weight sentences according to their domain similarity when updating the parameters of the neural translation model. We compare cost weighting to two traditional domain adaptation techniques developed for statistical machine translation: data selection and sub-corpus weighting. Experiments on two large-data tasks show that both the traditional techniques and our novel proposal lead to significant gains, with cost weighting outperforming the traditional methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2017-cost">
<titleInfo>
<title>Cost Weighting for Neural Machine Translation Domain Adaptation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boxing</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Foster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Larkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thang</namePart>
<namePart type="family">Luong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Birch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a new domain adaptation technique for neural machine translation called cost weighting, which is appropriate for adaptation scenarios in which a small in-domain data set and a large general-domain data set are available. Cost weighting incorporates a domain classifier into the neural machine translation training algorithm, using features derived from the encoder representation in order to distinguish in-domain from out-of-domain data. Classifier probabilities are used to weight sentences according to their domain similarity when updating the parameters of the neural translation model. We compare cost weighting to two traditional domain adaptation techniques developed for statistical machine translation: data selection and sub-corpus weighting. Experiments on two large-data tasks show that both the traditional techniques and our novel proposal lead to significant gains, with cost weighting outperforming the traditional methods.</abstract>
<identifier type="citekey">chen-etal-2017-cost</identifier>
<identifier type="doi">10.18653/v1/W17-3205</identifier>
<location>
<url>https://aclanthology.org/W17-3205/</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>40</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cost Weighting for Neural Machine Translation Domain Adaptation
%A Chen, Boxing
%A Cherry, Colin
%A Foster, George
%A Larkin, Samuel
%Y Luong, Thang
%Y Birch, Alexandra
%Y Neubig, Graham
%Y Finch, Andrew
%S Proceedings of the First Workshop on Neural Machine Translation
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver
%F chen-etal-2017-cost
%X In this paper, we propose a new domain adaptation technique for neural machine translation called cost weighting, which is appropriate for adaptation scenarios in which a small in-domain data set and a large general-domain data set are available. Cost weighting incorporates a domain classifier into the neural machine translation training algorithm, using features derived from the encoder representation in order to distinguish in-domain from out-of-domain data. Classifier probabilities are used to weight sentences according to their domain similarity when updating the parameters of the neural translation model. We compare cost weighting to two traditional domain adaptation techniques developed for statistical machine translation: data selection and sub-corpus weighting. Experiments on two large-data tasks show that both the traditional techniques and our novel proposal lead to significant gains, with cost weighting outperforming the traditional methods.
%R 10.18653/v1/W17-3205
%U https://aclanthology.org/W17-3205/
%U https://doi.org/10.18653/v1/W17-3205
%P 40-46
Markdown (Informal)
[Cost Weighting for Neural Machine Translation Domain Adaptation](https://aclanthology.org/W17-3205/) (Chen et al., NGT 2017)
ACL