@inproceedings{keita-etal-2024-feriji,
title = "Feriji: A {F}rench-{Z}arma Parallel Corpus, Glossary {\&} Translator",
author = "Keita, Mamadou and
Ibrahim, Elysabhete and
Alfari, Habibatou and
Homan, Christopher",
editor = "Fu, Xiyan and
Fleisig, Eve",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-srw.1/",
doi = "10.18653/v1/2024.acl-srw.1",
pages = "1--9",
abstract = "Machine translation (MT) is a rapidly expanding field that has experienced significant advancements in recent years with the development of models capable of translating multiple languages with remarkable accuracy. However, the representation of African languages in this field still needs improvement due to linguistic complexities and limited resources. This applies to the Zarma language, a dialect of Songhay (of the Nilo-Saharan language family) spoken by over 5 million people across Niger and neighboring countries (Lewis et al., 2016). This paper introduces Feriji, the first robust French-Zarma parallel corpus and glossary designed for MT. The corpus, containing 61,085 sentences in Zarma and 42,789 in French, and a glossary of 4,062 words represents a significant step in addressing the need for more resources for Zarma. We fine-tune three large language models on our dataset, obtaining a BLEU score of 30.06 on the best-performing model. We further evaluate the models on human judgments of fluency, comprehension, and readability and the importance and impact of the corpus and models. Our contributions help to bridge a significant language gap and promote an essential and overlooked indigenous African language."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="keita-etal-2024-feriji">
<titleInfo>
<title>Feriji: A French-Zarma Parallel Corpus, Glossary & Translator</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mamadou</namePart>
<namePart type="family">Keita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elysabhete</namePart>
<namePart type="family">Ibrahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Habibatou</namePart>
<namePart type="family">Alfari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Homan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiyan</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eve</namePart>
<namePart type="family">Fleisig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation (MT) is a rapidly expanding field that has experienced significant advancements in recent years with the development of models capable of translating multiple languages with remarkable accuracy. However, the representation of African languages in this field still needs improvement due to linguistic complexities and limited resources. This applies to the Zarma language, a dialect of Songhay (of the Nilo-Saharan language family) spoken by over 5 million people across Niger and neighboring countries (Lewis et al., 2016). This paper introduces Feriji, the first robust French-Zarma parallel corpus and glossary designed for MT. The corpus, containing 61,085 sentences in Zarma and 42,789 in French, and a glossary of 4,062 words represents a significant step in addressing the need for more resources for Zarma. We fine-tune three large language models on our dataset, obtaining a BLEU score of 30.06 on the best-performing model. We further evaluate the models on human judgments of fluency, comprehension, and readability and the importance and impact of the corpus and models. Our contributions help to bridge a significant language gap and promote an essential and overlooked indigenous African language.</abstract>
<identifier type="citekey">keita-etal-2024-feriji</identifier>
<identifier type="doi">10.18653/v1/2024.acl-srw.1</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-srw.1/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Feriji: A French-Zarma Parallel Corpus, Glossary & Translator
%A Keita, Mamadou
%A Ibrahim, Elysabhete
%A Alfari, Habibatou
%A Homan, Christopher
%Y Fu, Xiyan
%Y Fleisig, Eve
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F keita-etal-2024-feriji
%X Machine translation (MT) is a rapidly expanding field that has experienced significant advancements in recent years with the development of models capable of translating multiple languages with remarkable accuracy. However, the representation of African languages in this field still needs improvement due to linguistic complexities and limited resources. This applies to the Zarma language, a dialect of Songhay (of the Nilo-Saharan language family) spoken by over 5 million people across Niger and neighboring countries (Lewis et al., 2016). This paper introduces Feriji, the first robust French-Zarma parallel corpus and glossary designed for MT. The corpus, containing 61,085 sentences in Zarma and 42,789 in French, and a glossary of 4,062 words represents a significant step in addressing the need for more resources for Zarma. We fine-tune three large language models on our dataset, obtaining a BLEU score of 30.06 on the best-performing model. We further evaluate the models on human judgments of fluency, comprehension, and readability and the importance and impact of the corpus and models. Our contributions help to bridge a significant language gap and promote an essential and overlooked indigenous African language.
%R 10.18653/v1/2024.acl-srw.1
%U https://aclanthology.org/2024.luhme-srw.1/
%U https://doi.org/10.18653/v1/2024.acl-srw.1
%P 1-9
Markdown (Informal)
[Feriji: A French-Zarma Parallel Corpus, Glossary & Translator](https://aclanthology.org/2024.luhme-srw.1/) (Keita et al., ACL 2024)
ACL
- Mamadou Keita, Elysabhete Ibrahim, Habibatou Alfari, and Christopher Homan. 2024. Feriji: A French-Zarma Parallel Corpus, Glossary & Translator. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 1–9, Bangkok, Thailand. Association for Computational Linguistics.