@inproceedings{kim-etal-2021-using,
title = "Using Confidential Data for Domain Adaptation of Neural Machine Translation",
author = "Kim, Sohyung and
Bisazza, Arianna and
Turkmen, Fatih",
editor = "Feyisetan, Oluwaseyi and
Ghanavati, Sepideh and
Malmasi, Shervin and
Thaine, Patricia",
booktitle = "Proceedings of the Third Workshop on Privacy in Natural Language Processing",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.privatenlp-1.6",
doi = "10.18653/v1/2021.privatenlp-1.6",
pages = "46--52",
abstract = "We study the problem of domain adaptation in Neural Machine Translation (NMT) when domain-specific data cannot be shared due to confidentiality or copyright issues. As a first step, we propose to fragment data into phrase pairs and use a random sample to fine-tune a generic NMT model instead of the full sentences. Despite the loss of long segments for the sake of confidentiality protection, we find that NMT quality can considerably benefit from this adaptation, and that further gains can be obtained with a simple tagging technique.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2021-using">
<titleInfo>
<title>Using Confidential Data for Domain Adaptation of Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sohyung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatih</namePart>
<namePart type="family">Turkmen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Privacy in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We study the problem of domain adaptation in Neural Machine Translation (NMT) when domain-specific data cannot be shared due to confidentiality or copyright issues. As a first step, we propose to fragment data into phrase pairs and use a random sample to fine-tune a generic NMT model instead of the full sentences. Despite the loss of long segments for the sake of confidentiality protection, we find that NMT quality can considerably benefit from this adaptation, and that further gains can be obtained with a simple tagging technique.</abstract>
<identifier type="citekey">kim-etal-2021-using</identifier>
<identifier type="doi">10.18653/v1/2021.privatenlp-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.privatenlp-1.6</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>46</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Confidential Data for Domain Adaptation of Neural Machine Translation
%A Kim, Sohyung
%A Bisazza, Arianna
%A Turkmen, Fatih
%Y Feyisetan, Oluwaseyi
%Y Ghanavati, Sepideh
%Y Malmasi, Shervin
%Y Thaine, Patricia
%S Proceedings of the Third Workshop on Privacy in Natural Language Processing
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F kim-etal-2021-using
%X We study the problem of domain adaptation in Neural Machine Translation (NMT) when domain-specific data cannot be shared due to confidentiality or copyright issues. As a first step, we propose to fragment data into phrase pairs and use a random sample to fine-tune a generic NMT model instead of the full sentences. Despite the loss of long segments for the sake of confidentiality protection, we find that NMT quality can considerably benefit from this adaptation, and that further gains can be obtained with a simple tagging technique.
%R 10.18653/v1/2021.privatenlp-1.6
%U https://aclanthology.org/2021.privatenlp-1.6
%U https://doi.org/10.18653/v1/2021.privatenlp-1.6
%P 46-52
Markdown (Informal)
[Using Confidential Data for Domain Adaptation of Neural Machine Translation](https://aclanthology.org/2021.privatenlp-1.6) (Kim et al., PrivateNLP 2021)
ACL