@article{kunchukuttan-etal-2018-leveraging,
title = "Leveraging Orthographic Similarity for Multilingual Neural Transliteration",
author = "Kunchukuttan, Anoop and
Khapra, Mitesh and
Singh, Gurneet and
Bhattacharyya, Pushpak",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina and
Roark, Brian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "6",
year = "2018",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q18-1022",
doi = "10.1162/tacl_a_00022",
pages = "303--316",
abstract = "We address the task of joint training of transliteration models for multiple language pairs (multilingual transliteration). This is an instance of multitask learning, where individual tasks (language pairs) benefit from sharing knowledge with related tasks. We focus on transliteration involving related tasks i.e., languages sharing writing systems and phonetic properties (orthographically similar languages). We propose a modified neural encoder-decoder model that maximizes parameter sharing across language pairs in order to effectively leverage orthographic similarity. We show that multilingual transliteration significantly outperforms bilingual transliteration in different scenarios (average increase of 58{\%} across a variety of languages we experimented with). We also show that multilingual transliteration models can generalize well to languages/language pairs not encountered during training and hence perform well on the zeroshot transliteration task. We show that further improvements can be achieved by using phonetic feature input.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kunchukuttan-etal-2018-leveraging">
<titleInfo>
<title>Leveraging Orthographic Similarity for Multilingual Neural Transliteration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kunchukuttan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mitesh</namePart>
<namePart type="family">Khapra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gurneet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We address the task of joint training of transliteration models for multiple language pairs (multilingual transliteration). This is an instance of multitask learning, where individual tasks (language pairs) benefit from sharing knowledge with related tasks. We focus on transliteration involving related tasks i.e., languages sharing writing systems and phonetic properties (orthographically similar languages). We propose a modified neural encoder-decoder model that maximizes parameter sharing across language pairs in order to effectively leverage orthographic similarity. We show that multilingual transliteration significantly outperforms bilingual transliteration in different scenarios (average increase of 58% across a variety of languages we experimented with). We also show that multilingual transliteration models can generalize well to languages/language pairs not encountered during training and hence perform well on the zeroshot transliteration task. We show that further improvements can be achieved by using phonetic feature input.</abstract>
<identifier type="citekey">kunchukuttan-etal-2018-leveraging</identifier>
<identifier type="doi">10.1162/tacl_a_00022</identifier>
<location>
<url>https://aclanthology.org/Q18-1022</url>
</location>
<part>
<date>2018</date>
<detail type="volume"><number>6</number></detail>
<extent unit="page">
<start>303</start>
<end>316</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Leveraging Orthographic Similarity for Multilingual Neural Transliteration
%A Kunchukuttan, Anoop
%A Khapra, Mitesh
%A Singh, Gurneet
%A Bhattacharyya, Pushpak
%J Transactions of the Association for Computational Linguistics
%D 2018
%V 6
%I MIT Press
%C Cambridge, MA
%F kunchukuttan-etal-2018-leveraging
%X We address the task of joint training of transliteration models for multiple language pairs (multilingual transliteration). This is an instance of multitask learning, where individual tasks (language pairs) benefit from sharing knowledge with related tasks. We focus on transliteration involving related tasks i.e., languages sharing writing systems and phonetic properties (orthographically similar languages). We propose a modified neural encoder-decoder model that maximizes parameter sharing across language pairs in order to effectively leverage orthographic similarity. We show that multilingual transliteration significantly outperforms bilingual transliteration in different scenarios (average increase of 58% across a variety of languages we experimented with). We also show that multilingual transliteration models can generalize well to languages/language pairs not encountered during training and hence perform well on the zeroshot transliteration task. We show that further improvements can be achieved by using phonetic feature input.
%R 10.1162/tacl_a_00022
%U https://aclanthology.org/Q18-1022
%U https://doi.org/10.1162/tacl_a_00022
%P 303-316
Markdown (Informal)
[Leveraging Orthographic Similarity for Multilingual Neural Transliteration](https://aclanthology.org/Q18-1022) (Kunchukuttan et al., TACL 2018)
ACL