@inproceedings{miyagawa-2023-machine,
title = "Machine Translation for Highly Low-Resource Language: A Case Study of {A}inu, a Critically Endangered Indigenous Language in {N}orthern {J}apan",
author = "Miyagawa, So",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Pirinen, Flammie and
Alnajjar, Khalid and
Miyagawa, So and
Bizzoni, Yuri and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2023",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nlp4dh-1.14",
pages = "120--124",
abstract = "This paper explores the potential of Machine Translation (MT) in preserving and revitalizing Ainu, an indigenous language of Japan classified as critically endangered by UNESCO. Through leveraging Marian MT, an open-source Neural Machine Translation framework, this study addresses the challenging linguistic features of Ainu and the limitations of available resources. The research implemented a meticulous methodology involving rigorous preprocessing of data, prudent training of the model, and robust evaluation using the SacreBLEU metric. The findings underscore the system{'}s efficacy, achieving a SacreBLEU score of 32.90 for Japanese to Ainu translation. This promising result highlights the capacity of MT systems to support language preservation and aligns with recent research emphasizing the potential of computational techniques for low-resource languages. The paper concludes by affirming the significant role of MT in the broader context of language preservation, serving as a crucial tool in the fight against language extinction. The study paves the way for future research to harness advanced MT techniques and develop more sophisticated models for endangered languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="miyagawa-2023-machine">
<titleInfo>
<title>Machine Translation for Highly Low-Resource Language: A Case Study of Ainu, a Critically Endangered Indigenous Language in Northern Japan</title>
</titleInfo>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper explores the potential of Machine Translation (MT) in preserving and revitalizing Ainu, an indigenous language of Japan classified as critically endangered by UNESCO. Through leveraging Marian MT, an open-source Neural Machine Translation framework, this study addresses the challenging linguistic features of Ainu and the limitations of available resources. The research implemented a meticulous methodology involving rigorous preprocessing of data, prudent training of the model, and robust evaluation using the SacreBLEU metric. The findings underscore the system’s efficacy, achieving a SacreBLEU score of 32.90 for Japanese to Ainu translation. This promising result highlights the capacity of MT systems to support language preservation and aligns with recent research emphasizing the potential of computational techniques for low-resource languages. The paper concludes by affirming the significant role of MT in the broader context of language preservation, serving as a crucial tool in the fight against language extinction. The study paves the way for future research to harness advanced MT techniques and develop more sophisticated models for endangered languages.</abstract>
<identifier type="citekey">miyagawa-2023-machine</identifier>
<location>
<url>https://aclanthology.org/2023.nlp4dh-1.14</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>120</start>
<end>124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translation for Highly Low-Resource Language: A Case Study of Ainu, a Critically Endangered Indigenous Language in Northern Japan
%A Miyagawa, So
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Pirinen, Flammie
%Y Alnajjar, Khalid
%Y Miyagawa, So
%Y Bizzoni, Yuri
%Y Partanen, Niko
%Y Rueter, Jack
%S Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages
%D 2023
%8 December
%I Association for Computational Linguistics
%C Tokyo, Japan
%F miyagawa-2023-machine
%X This paper explores the potential of Machine Translation (MT) in preserving and revitalizing Ainu, an indigenous language of Japan classified as critically endangered by UNESCO. Through leveraging Marian MT, an open-source Neural Machine Translation framework, this study addresses the challenging linguistic features of Ainu and the limitations of available resources. The research implemented a meticulous methodology involving rigorous preprocessing of data, prudent training of the model, and robust evaluation using the SacreBLEU metric. The findings underscore the system’s efficacy, achieving a SacreBLEU score of 32.90 for Japanese to Ainu translation. This promising result highlights the capacity of MT systems to support language preservation and aligns with recent research emphasizing the potential of computational techniques for low-resource languages. The paper concludes by affirming the significant role of MT in the broader context of language preservation, serving as a crucial tool in the fight against language extinction. The study paves the way for future research to harness advanced MT techniques and develop more sophisticated models for endangered languages.
%U https://aclanthology.org/2023.nlp4dh-1.14
%P 120-124
Markdown (Informal)
[Machine Translation for Highly Low-Resource Language: A Case Study of Ainu, a Critically Endangered Indigenous Language in Northern Japan](https://aclanthology.org/2023.nlp4dh-1.14) (Miyagawa, NLP4DH-IWCLUL 2023)
ACL