@inproceedings{kristensen-mclachlan-nedergard-2024-new,
title = "A New Benchmark for {K}alaallisut-{D}anish Neural Machine Translation",
author = "Kristensen-Mclachlan, Ross and
Nederg{\aa}rd, Johanne",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Rijhwani, Shruti and
Oncevay, Arturo and
Chiruzzo, Luis and
Pugh, Robert and
von der Wense, Katharina",
booktitle = "Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.americasnlp-1.7",
doi = "10.18653/v1/2024.americasnlp-1.7",
pages = "50--55",
abstract = "Kalaallisut, also known as (West) Greenlandic, poses a number of unique challenges to contemporary natural language processing (NLP). In particular, the language has historically lacked benchmarking datasets and robust evaluation of specific NLP tasks, such as neural machine translation (NMT). In this paper, we present a new benchmark dataset for Greenlandic to Danish NMT comprising over 1.2m words of Greenlandic and 2.1m words of parallel Danish translations. We provide initial metrics for models trained on this dataset and conclude by suggesting how these findings can be taken forward to other NLP tasks for the Greenlandic language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kristensen-mclachlan-nedergard-2024-new">
<titleInfo>
<title>A New Benchmark for Kalaallisut-Danish Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ross</namePart>
<namePart type="family">Kristensen-Mclachlan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanne</namePart>
<namePart type="family">Nedergård</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Pugh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">von der Wense</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Kalaallisut, also known as (West) Greenlandic, poses a number of unique challenges to contemporary natural language processing (NLP). In particular, the language has historically lacked benchmarking datasets and robust evaluation of specific NLP tasks, such as neural machine translation (NMT). In this paper, we present a new benchmark dataset for Greenlandic to Danish NMT comprising over 1.2m words of Greenlandic and 2.1m words of parallel Danish translations. We provide initial metrics for models trained on this dataset and conclude by suggesting how these findings can be taken forward to other NLP tasks for the Greenlandic language.</abstract>
<identifier type="citekey">kristensen-mclachlan-nedergard-2024-new</identifier>
<identifier type="doi">10.18653/v1/2024.americasnlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2024.americasnlp-1.7</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>50</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A New Benchmark for Kalaallisut-Danish Neural Machine Translation
%A Kristensen-Mclachlan, Ross
%A Nedergård, Johanne
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Rijhwani, Shruti
%Y Oncevay, Arturo
%Y Chiruzzo, Luis
%Y Pugh, Robert
%Y von der Wense, Katharina
%S Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F kristensen-mclachlan-nedergard-2024-new
%X Kalaallisut, also known as (West) Greenlandic, poses a number of unique challenges to contemporary natural language processing (NLP). In particular, the language has historically lacked benchmarking datasets and robust evaluation of specific NLP tasks, such as neural machine translation (NMT). In this paper, we present a new benchmark dataset for Greenlandic to Danish NMT comprising over 1.2m words of Greenlandic and 2.1m words of parallel Danish translations. We provide initial metrics for models trained on this dataset and conclude by suggesting how these findings can be taken forward to other NLP tasks for the Greenlandic language.
%R 10.18653/v1/2024.americasnlp-1.7
%U https://aclanthology.org/2024.americasnlp-1.7
%U https://doi.org/10.18653/v1/2024.americasnlp-1.7
%P 50-55
Markdown (Informal)
[A New Benchmark for Kalaallisut-Danish Neural Machine Translation](https://aclanthology.org/2024.americasnlp-1.7) (Kristensen-Mclachlan & Nedergård, AmericasNLP-WS 2024)
ACL
- Ross Kristensen-Mclachlan and Johanne Nedergård. 2024. A New Benchmark for Kalaallisut-Danish Neural Machine Translation. In Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024), pages 50–55, Mexico City, Mexico. Association for Computational Linguistics.