@article{naim-etal-2018-feature,
title = "Feature-Based Decipherment for Machine Translation",
author = "Naim, Iftekhar and
Riley, Parker and
Gildea, Daniel",
journal = "Computational Linguistics",
volume = "44",
number = "3",
month = sep,
year = "2018",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J18-3006",
doi = "10.1162/coli_a_00326",
pages = "525--546",
abstract = "Orthographic similarities across languages provide a strong signal for unsupervised probabilistic transduction (decipherment) for closely related language pairs. The existing decipherment models, however, are not well suited for exploiting these orthographic similarities. We propose a log-linear model with latent variables that incorporates orthographic similarity features. Maximum likelihood training is computationally expensive for the proposed log-linear model. To address this challenge, we perform approximate inference via Markov chain Monte Carlo sampling and contrastive divergence. Our results show that the proposed log-linear model with contrastive divergence outperforms the existing generative decipherment models by exploiting the orthographic features. The model both scales to large vocabularies and preserves accuracy in low- and no-resource contexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="naim-etal-2018-feature">
<titleInfo>
<title>Feature-Based Decipherment for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iftekhar</namePart>
<namePart type="family">Naim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parker</namePart>
<namePart type="family">Riley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Gildea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Orthographic similarities across languages provide a strong signal for unsupervised probabilistic transduction (decipherment) for closely related language pairs. The existing decipherment models, however, are not well suited for exploiting these orthographic similarities. We propose a log-linear model with latent variables that incorporates orthographic similarity features. Maximum likelihood training is computationally expensive for the proposed log-linear model. To address this challenge, we perform approximate inference via Markov chain Monte Carlo sampling and contrastive divergence. Our results show that the proposed log-linear model with contrastive divergence outperforms the existing generative decipherment models by exploiting the orthographic features. The model both scales to large vocabularies and preserves accuracy in low- and no-resource contexts.</abstract>
<identifier type="citekey">naim-etal-2018-feature</identifier>
<identifier type="doi">10.1162/coli_a_00326</identifier>
<location>
<url>https://aclanthology.org/J18-3006</url>
</location>
<part>
<date>2018-09</date>
<detail type="volume"><number>44</number></detail>
<detail type="issue"><number>3</number></detail>
<extent unit="page">
<start>525</start>
<end>546</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Feature-Based Decipherment for Machine Translation
%A Naim, Iftekhar
%A Riley, Parker
%A Gildea, Daniel
%J Computational Linguistics
%D 2018
%8 September
%V 44
%N 3
%I MIT Press
%C Cambridge, MA
%F naim-etal-2018-feature
%X Orthographic similarities across languages provide a strong signal for unsupervised probabilistic transduction (decipherment) for closely related language pairs. The existing decipherment models, however, are not well suited for exploiting these orthographic similarities. We propose a log-linear model with latent variables that incorporates orthographic similarity features. Maximum likelihood training is computationally expensive for the proposed log-linear model. To address this challenge, we perform approximate inference via Markov chain Monte Carlo sampling and contrastive divergence. Our results show that the proposed log-linear model with contrastive divergence outperforms the existing generative decipherment models by exploiting the orthographic features. The model both scales to large vocabularies and preserves accuracy in low- and no-resource contexts.
%R 10.1162/coli_a_00326
%U https://aclanthology.org/J18-3006
%U https://doi.org/10.1162/coli_a_00326
%P 525-546
Markdown (Informal)
[Feature-Based Decipherment for Machine Translation](https://aclanthology.org/J18-3006) (Naim et al., CL 2018)
ACL