@inproceedings{attieh-etal-2024-system,
title = "System Description of the {N}ordics{A}lps Submission to the {A}mericas{NLP} 2024 Machine Translation Shared Task",
author = "Attieh, Joseph and
Hopton, Zachary and
Scherrer, Yves and
Samard{\v{z}}i{\'c}, Tanja",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Rijhwani, Shruti and
Oncevay, Arturo and
Chiruzzo, Luis and
Pugh, Robert and
von der Wense, Katharina",
booktitle = "Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.americasnlp-1.18",
doi = "10.18653/v1/2024.americasnlp-1.18",
pages = "150--158",
abstract = "This paper presents the system description of the NordicsAlps team for the AmericasNLP 2024 Machine Translation Shared Task 1. We investigate the effect of tokenization on translation quality by exploring two different tokenization schemes: byte-level and redundancy-driven tokenization. We submitted three runs per language pair. The redundancy-driven tokenization ranked first among all submissions, scoring the highest average chrF2++, chrF, and BLEU metrics (averaged across all languages). These findings demonstrate the importance of carefully tailoring the tokenization strategies of machine translation systems, particularly in resource-constrained scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="attieh-etal-2024-system">
<titleInfo>
<title>System Description of the NordicsAlps Submission to the AmericasNLP 2024 Machine Translation Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Attieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zachary</namePart>
<namePart type="family">Hopton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanja</namePart>
<namePart type="family">Samardžić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Pugh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">von der Wense</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the system description of the NordicsAlps team for the AmericasNLP 2024 Machine Translation Shared Task 1. We investigate the effect of tokenization on translation quality by exploring two different tokenization schemes: byte-level and redundancy-driven tokenization. We submitted three runs per language pair. The redundancy-driven tokenization ranked first among all submissions, scoring the highest average chrF2++, chrF, and BLEU metrics (averaged across all languages). These findings demonstrate the importance of carefully tailoring the tokenization strategies of machine translation systems, particularly in resource-constrained scenarios.</abstract>
<identifier type="citekey">attieh-etal-2024-system</identifier>
<identifier type="doi">10.18653/v1/2024.americasnlp-1.18</identifier>
<location>
<url>https://aclanthology.org/2024.americasnlp-1.18</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>150</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T System Description of the NordicsAlps Submission to the AmericasNLP 2024 Machine Translation Shared Task
%A Attieh, Joseph
%A Hopton, Zachary
%A Scherrer, Yves
%A Samardžić, Tanja
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Rijhwani, Shruti
%Y Oncevay, Arturo
%Y Chiruzzo, Luis
%Y Pugh, Robert
%Y von der Wense, Katharina
%S Proceedings of the 4th Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F attieh-etal-2024-system
%X This paper presents the system description of the NordicsAlps team for the AmericasNLP 2024 Machine Translation Shared Task 1. We investigate the effect of tokenization on translation quality by exploring two different tokenization schemes: byte-level and redundancy-driven tokenization. We submitted three runs per language pair. The redundancy-driven tokenization ranked first among all submissions, scoring the highest average chrF2++, chrF, and BLEU metrics (averaged across all languages). These findings demonstrate the importance of carefully tailoring the tokenization strategies of machine translation systems, particularly in resource-constrained scenarios.
%R 10.18653/v1/2024.americasnlp-1.18
%U https://aclanthology.org/2024.americasnlp-1.18
%U https://doi.org/10.18653/v1/2024.americasnlp-1.18
%P 150-158
Markdown (Informal)
[System Description of the NordicsAlps Submission to the AmericasNLP 2024 Machine Translation Shared Task](https://aclanthology.org/2024.americasnlp-1.18) (Attieh et al., AmericasNLP-WS 2024)
ACL