@inproceedings{igarashi-miyagawa-2024-enhancing,
title = "Enhancing Neural Machine Translation for {A}inu-{J}apanese: A Comprehensive Study on the Impact of Domain and Dialect Integration",
author = "Igarashi, Ryo and
Miyagawa, So",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.40",
pages = "413--422",
abstract = "Neural Machine Translation (NMT) has revolutionized language translation, yet significant challenges persist for low-resource languages, particularly those with high dialectal variation and limited standardization. This comprehensive study focuses on the Ainu language, a critically endangered indigenous language of northern Japan, which epitomizes these challenges. We address the limitations of previous research through two primary strategies: (1) extensive corpus expansion encompassing diverse domains and dialects, and (2) development of innovative methods to incorporate dialect and domain information directly into the translation process. Our approach yielded substantial improvements in translation quality, with BLEU scores increasing from 32.90 to 39.06 (+6.16) for Japanese → Ainu and from 10.45 to 31.83 (+21.38) for Ainu → Japanese. Through rigorous experimentation and analysis, we demonstrate the crucial importance of integrating linguistic variation information in NMT systems for languages characterized by high diversity and limited resources. Our findings have broad implications for improving machine translation for other low-resource languages, potentially advancing preservation and revitalization efforts for endangered languages worldwide.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="igarashi-miyagawa-2024-enhancing">
<titleInfo>
<title>Enhancing Neural Machine Translation for Ainu-Japanese: A Comprehensive Study on the Impact of Domain and Dialect Integration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryo</namePart>
<namePart type="family">Igarashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural Machine Translation (NMT) has revolutionized language translation, yet significant challenges persist for low-resource languages, particularly those with high dialectal variation and limited standardization. This comprehensive study focuses on the Ainu language, a critically endangered indigenous language of northern Japan, which epitomizes these challenges. We address the limitations of previous research through two primary strategies: (1) extensive corpus expansion encompassing diverse domains and dialects, and (2) development of innovative methods to incorporate dialect and domain information directly into the translation process. Our approach yielded substantial improvements in translation quality, with BLEU scores increasing from 32.90 to 39.06 (+6.16) for Japanese → Ainu and from 10.45 to 31.83 (+21.38) for Ainu → Japanese. Through rigorous experimentation and analysis, we demonstrate the crucial importance of integrating linguistic variation information in NMT systems for languages characterized by high diversity and limited resources. Our findings have broad implications for improving machine translation for other low-resource languages, potentially advancing preservation and revitalization efforts for endangered languages worldwide.</abstract>
<identifier type="citekey">igarashi-miyagawa-2024-enhancing</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.40</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>413</start>
<end>422</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhancing Neural Machine Translation for Ainu-Japanese: A Comprehensive Study on the Impact of Domain and Dialect Integration
%A Igarashi, Ryo
%A Miyagawa, So
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F igarashi-miyagawa-2024-enhancing
%X Neural Machine Translation (NMT) has revolutionized language translation, yet significant challenges persist for low-resource languages, particularly those with high dialectal variation and limited standardization. This comprehensive study focuses on the Ainu language, a critically endangered indigenous language of northern Japan, which epitomizes these challenges. We address the limitations of previous research through two primary strategies: (1) extensive corpus expansion encompassing diverse domains and dialects, and (2) development of innovative methods to incorporate dialect and domain information directly into the translation process. Our approach yielded substantial improvements in translation quality, with BLEU scores increasing from 32.90 to 39.06 (+6.16) for Japanese → Ainu and from 10.45 to 31.83 (+21.38) for Ainu → Japanese. Through rigorous experimentation and analysis, we demonstrate the crucial importance of integrating linguistic variation information in NMT systems for languages characterized by high diversity and limited resources. Our findings have broad implications for improving machine translation for other low-resource languages, potentially advancing preservation and revitalization efforts for endangered languages worldwide.
%U https://aclanthology.org/2024.nlp4dh-1.40
%P 413-422
Markdown (Informal)
[Enhancing Neural Machine Translation for Ainu-Japanese: A Comprehensive Study on the Impact of Domain and Dialect Integration](https://aclanthology.org/2024.nlp4dh-1.40) (Igarashi & Miyagawa, NLP4DH 2024)
ACL