@inproceedings{kim-etal-2020-zero,
title = "Zero-shot {N}orth {K}orean to {E}nglish Neural Machine Translation by Character Tokenization and Phoneme Decomposition",
author = "Kim, Hwichan and
Hirasawa, Tosho and
Komachi, Mamoru",
editor = "Rijhwani, Shruti and
Liu, Jiangming and
Wang, Yizhong and
Dror, Rotem",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-srw.11",
doi = "10.18653/v1/2020.acl-srw.11",
pages = "72--78",
abstract = "The primary limitation of North Korean to English translation is the lack of a parallel corpus; therefore, high translation accuracy cannot be achieved. To address this problem, we propose a zero-shot approach using South Korean data, which are remarkably similar to North Korean data. We train a neural machine translation model after tokenizing a South Korean text at the character level and decomposing characters into phonemes. We demonstrate that our method can effectively learn North Korean to English translation and improve the BLEU scores by +1.01 points in comparison with the baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2020-zero">
<titleInfo>
<title>Zero-shot North Korean to English Neural Machine Translation by Character Tokenization and Phoneme Decomposition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hwichan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tosho</namePart>
<namePart type="family">Hirasawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiangming</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yizhong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The primary limitation of North Korean to English translation is the lack of a parallel corpus; therefore, high translation accuracy cannot be achieved. To address this problem, we propose a zero-shot approach using South Korean data, which are remarkably similar to North Korean data. We train a neural machine translation model after tokenizing a South Korean text at the character level and decomposing characters into phonemes. We demonstrate that our method can effectively learn North Korean to English translation and improve the BLEU scores by +1.01 points in comparison with the baseline.</abstract>
<identifier type="citekey">kim-etal-2020-zero</identifier>
<identifier type="doi">10.18653/v1/2020.acl-srw.11</identifier>
<location>
<url>https://aclanthology.org/2020.acl-srw.11</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>72</start>
<end>78</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Zero-shot North Korean to English Neural Machine Translation by Character Tokenization and Phoneme Decomposition
%A Kim, Hwichan
%A Hirasawa, Tosho
%A Komachi, Mamoru
%Y Rijhwani, Shruti
%Y Liu, Jiangming
%Y Wang, Yizhong
%Y Dror, Rotem
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F kim-etal-2020-zero
%X The primary limitation of North Korean to English translation is the lack of a parallel corpus; therefore, high translation accuracy cannot be achieved. To address this problem, we propose a zero-shot approach using South Korean data, which are remarkably similar to North Korean data. We train a neural machine translation model after tokenizing a South Korean text at the character level and decomposing characters into phonemes. We demonstrate that our method can effectively learn North Korean to English translation and improve the BLEU scores by +1.01 points in comparison with the baseline.
%R 10.18653/v1/2020.acl-srw.11
%U https://aclanthology.org/2020.acl-srw.11
%U https://doi.org/10.18653/v1/2020.acl-srw.11
%P 72-78
Markdown (Informal)
[Zero-shot North Korean to English Neural Machine Translation by Character Tokenization and Phoneme Decomposition](https://aclanthology.org/2020.acl-srw.11) (Kim et al., ACL 2020)
ACL