@article{marie-fujita-2017-phrase,
title = "Phrase Table Induction Using In-Domain Monolingual Data for Domain Adaptation in Statistical Machine Translation",
author = "Marie, Benjamin and
Fujita, Atsushi",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "5",
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q17-1034",
doi = "10.1162/tacl_a_00075",
pages = "487--500",
abstract = "We present a new framework to induce an in-domain phrase table from in-domain monolingual data that can be used to adapt a general-domain statistical machine translation system to the targeted domain. Our method first compiles sets of phrases in source and target languages separately and generates candidate phrase pairs by taking the Cartesian product of the two phrase sets. It then computes inexpensive features for each candidate phrase pair and filters them using a supervised classifier in order to induce an in-domain phrase table. We experimented on the language pair English{--}French, both translation directions, in two domains and obtained consistently better results than a strong baseline system that uses an in-domain bilingual lexicon. We also conducted an error analysis that showed the induced phrase tables proposed useful translations, especially for words and phrases unseen in the parallel data used to train the general-domain baseline system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marie-fujita-2017-phrase">
<titleInfo>
<title>Phrase Table Induction Using In-Domain Monolingual Data for Domain Adaptation in Statistical Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Marie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsushi</namePart>
<namePart type="family">Fujita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We present a new framework to induce an in-domain phrase table from in-domain monolingual data that can be used to adapt a general-domain statistical machine translation system to the targeted domain. Our method first compiles sets of phrases in source and target languages separately and generates candidate phrase pairs by taking the Cartesian product of the two phrase sets. It then computes inexpensive features for each candidate phrase pair and filters them using a supervised classifier in order to induce an in-domain phrase table. We experimented on the language pair English–French, both translation directions, in two domains and obtained consistently better results than a strong baseline system that uses an in-domain bilingual lexicon. We also conducted an error analysis that showed the induced phrase tables proposed useful translations, especially for words and phrases unseen in the parallel data used to train the general-domain baseline system.</abstract>
<identifier type="citekey">marie-fujita-2017-phrase</identifier>
<identifier type="doi">10.1162/tacl_a_00075</identifier>
<location>
<url>https://aclanthology.org/Q17-1034</url>
</location>
<part>
<date>2017</date>
<detail type="volume"><number>5</number></detail>
<extent unit="page">
<start>487</start>
<end>500</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Phrase Table Induction Using In-Domain Monolingual Data for Domain Adaptation in Statistical Machine Translation
%A Marie, Benjamin
%A Fujita, Atsushi
%J Transactions of the Association for Computational Linguistics
%D 2017
%V 5
%I MIT Press
%C Cambridge, MA
%F marie-fujita-2017-phrase
%X We present a new framework to induce an in-domain phrase table from in-domain monolingual data that can be used to adapt a general-domain statistical machine translation system to the targeted domain. Our method first compiles sets of phrases in source and target languages separately and generates candidate phrase pairs by taking the Cartesian product of the two phrase sets. It then computes inexpensive features for each candidate phrase pair and filters them using a supervised classifier in order to induce an in-domain phrase table. We experimented on the language pair English–French, both translation directions, in two domains and obtained consistently better results than a strong baseline system that uses an in-domain bilingual lexicon. We also conducted an error analysis that showed the induced phrase tables proposed useful translations, especially for words and phrases unseen in the parallel data used to train the general-domain baseline system.
%R 10.1162/tacl_a_00075
%U https://aclanthology.org/Q17-1034
%U https://doi.org/10.1162/tacl_a_00075
%P 487-500
Markdown (Informal)
[Phrase Table Induction Using In-Domain Monolingual Data for Domain Adaptation in Statistical Machine Translation](https://aclanthology.org/Q17-1034) (Marie & Fujita, TACL 2017)
ACL