@inproceedings{zheng-etal-2022-improving,
title = "Improving {J}ejueo-{K}orean Translation With Cross-Lingual Pretraining Using {J}apanese and {K}orean",
author = "Zheng, Francis and
Marrese-Taylor, Edison and
Matsuo, Yutaka",
booktitle = "Proceedings of the 9th Workshop on Asian Translation",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.wat-1.3/",
pages = "44--50",
abstract = "Jejueo is a critically endangered language spoken on Jeju Island and is closely related to but mutually unintelligible with Korean. Parallel data between Jejueo and Korean is scarce, and translation between the two languages requires more attention, as current neural machine translation systems typically rely on large amounts of parallel training data. While low-resource machine translation has been shown to benefit from using additional monolingual data during the pretraining process, not as much research has been done on how to select languages other than the source and target languages for use during pretraining. We show that using large amounts of Korean and Japanese data during the pretraining process improves translation by 2.16 BLEU points for translation in the Jejueo {\textrightarrow} Korean direction and 1.34 BLEU points for translation in the Korean {\textrightarrow} Jejueo direction compared to the baseline."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2022-improving">
<titleInfo>
<title>Improving Jejueo-Korean Translation With Cross-Lingual Pretraining Using Japanese and Korean</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edison</namePart>
<namePart type="family">Marrese-Taylor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutaka</namePart>
<namePart type="family">Matsuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Asian Translation</title>
</titleInfo>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Jejueo is a critically endangered language spoken on Jeju Island and is closely related to but mutually unintelligible with Korean. Parallel data between Jejueo and Korean is scarce, and translation between the two languages requires more attention, as current neural machine translation systems typically rely on large amounts of parallel training data. While low-resource machine translation has been shown to benefit from using additional monolingual data during the pretraining process, not as much research has been done on how to select languages other than the source and target languages for use during pretraining. We show that using large amounts of Korean and Japanese data during the pretraining process improves translation by 2.16 BLEU points for translation in the Jejueo → Korean direction and 1.34 BLEU points for translation in the Korean → Jejueo direction compared to the baseline.</abstract>
<identifier type="citekey">zheng-etal-2022-improving</identifier>
<location>
<url>https://aclanthology.org/2022.wat-1.3/</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>44</start>
<end>50</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Jejueo-Korean Translation With Cross-Lingual Pretraining Using Japanese and Korean
%A Zheng, Francis
%A Marrese-Taylor, Edison
%A Matsuo, Yutaka
%S Proceedings of the 9th Workshop on Asian Translation
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%F zheng-etal-2022-improving
%X Jejueo is a critically endangered language spoken on Jeju Island and is closely related to but mutually unintelligible with Korean. Parallel data between Jejueo and Korean is scarce, and translation between the two languages requires more attention, as current neural machine translation systems typically rely on large amounts of parallel training data. While low-resource machine translation has been shown to benefit from using additional monolingual data during the pretraining process, not as much research has been done on how to select languages other than the source and target languages for use during pretraining. We show that using large amounts of Korean and Japanese data during the pretraining process improves translation by 2.16 BLEU points for translation in the Jejueo → Korean direction and 1.34 BLEU points for translation in the Korean → Jejueo direction compared to the baseline.
%U https://aclanthology.org/2022.wat-1.3/
%P 44-50
Markdown (Informal)
[Improving Jejueo-Korean Translation With Cross-Lingual Pretraining Using Japanese and Korean](https://aclanthology.org/2022.wat-1.3/) (Zheng et al., WAT 2022)
ACL