@inproceedings{tonja-etal-2024-ethiollm,
title = "{E}thio{LLM}: Multilingual Large Language Models for {E}thiopian Languages with Task Evaluation",
author = "Tonja, Atnafu Lambebo and
Azime, Israel Abebe and
Belay, Tadesse Destaw and
Yigezu, Mesay Gemeda and
Mehamed, Moges Ahmed Ah and
Ayele, Abinew Ali and
Jibril, Ebrahim Chekol and
Woldeyohannis, Michael Melese and
Kolesnikova, Olga and
Slusallek, Philipp and
Klakow, Dietrich and
Yimam, Seid Muhie",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.561",
pages = "6341--6352",
abstract = "Large language models (LLMs) have gained popularity recently due to their outstanding performance in various downstream Natural Language Processing (NLP) tasks. However, low-resource languages are still lagging behind current state-of-the-art (SOTA) developments in the field of NLP due to insufficient resources to train LLMs. Ethiopian languages exhibit remarkable linguistic diversity, encompassing a wide array of scripts, and are imbued with profound religious and cultural significance. This paper introduces EthioLLM {--} multilingual large language models for five Ethiopian languages (Amharic, Ge{'}ez, Afan Oromo, Somali, and Tigrinya) and English, and Ethiobenchmark {--} a new benchmark dataset for various downstream NLP tasks. We evaluate the performance of these models across five downstream NLP tasks. We open-source our multilingual language models, new benchmark datasets for various downstream tasks, and task-specific fine-tuned language models and discuss the performance of the models. Our dataset and models are available at the https://huggingface.co/EthioNLP repository.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tonja-etal-2024-ethiollm">
<titleInfo>
<title>EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atnafu</namePart>
<namePart type="given">Lambebo</namePart>
<namePart type="family">Tonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Israel</namePart>
<namePart type="given">Abebe</namePart>
<namePart type="family">Azime</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tadesse</namePart>
<namePart type="given">Destaw</namePart>
<namePart type="family">Belay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mesay</namePart>
<namePart type="given">Gemeda</namePart>
<namePart type="family">Yigezu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moges</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="given">Ah</namePart>
<namePart type="family">Mehamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abinew</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Ayele</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ebrahim</namePart>
<namePart type="given">Chekol</namePart>
<namePart type="family">Jibril</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">Melese</namePart>
<namePart type="family">Woldeyohannis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Kolesnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Slusallek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="given">Muhie</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have gained popularity recently due to their outstanding performance in various downstream Natural Language Processing (NLP) tasks. However, low-resource languages are still lagging behind current state-of-the-art (SOTA) developments in the field of NLP due to insufficient resources to train LLMs. Ethiopian languages exhibit remarkable linguistic diversity, encompassing a wide array of scripts, and are imbued with profound religious and cultural significance. This paper introduces EthioLLM – multilingual large language models for five Ethiopian languages (Amharic, Ge’ez, Afan Oromo, Somali, and Tigrinya) and English, and Ethiobenchmark – a new benchmark dataset for various downstream NLP tasks. We evaluate the performance of these models across five downstream NLP tasks. We open-source our multilingual language models, new benchmark datasets for various downstream tasks, and task-specific fine-tuned language models and discuss the performance of the models. Our dataset and models are available at the https://huggingface.co/EthioNLP repository.</abstract>
<identifier type="citekey">tonja-etal-2024-ethiollm</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.561</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6341</start>
<end>6352</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation
%A Tonja, Atnafu Lambebo
%A Azime, Israel Abebe
%A Belay, Tadesse Destaw
%A Yigezu, Mesay Gemeda
%A Mehamed, Moges Ahmed Ah
%A Ayele, Abinew Ali
%A Jibril, Ebrahim Chekol
%A Woldeyohannis, Michael Melese
%A Kolesnikova, Olga
%A Slusallek, Philipp
%A Klakow, Dietrich
%A Yimam, Seid Muhie
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F tonja-etal-2024-ethiollm
%X Large language models (LLMs) have gained popularity recently due to their outstanding performance in various downstream Natural Language Processing (NLP) tasks. However, low-resource languages are still lagging behind current state-of-the-art (SOTA) developments in the field of NLP due to insufficient resources to train LLMs. Ethiopian languages exhibit remarkable linguistic diversity, encompassing a wide array of scripts, and are imbued with profound religious and cultural significance. This paper introduces EthioLLM – multilingual large language models for five Ethiopian languages (Amharic, Ge’ez, Afan Oromo, Somali, and Tigrinya) and English, and Ethiobenchmark – a new benchmark dataset for various downstream NLP tasks. We evaluate the performance of these models across five downstream NLP tasks. We open-source our multilingual language models, new benchmark datasets for various downstream tasks, and task-specific fine-tuned language models and discuss the performance of the models. Our dataset and models are available at the https://huggingface.co/EthioNLP repository.
%U https://aclanthology.org/2024.lrec-main.561
%P 6341-6352
Markdown (Informal)
[EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation](https://aclanthology.org/2024.lrec-main.561) (Tonja et al., LREC-COLING 2024)
ACL
- Atnafu Lambebo Tonja, Israel Abebe Azime, Tadesse Destaw Belay, Mesay Gemeda Yigezu, Moges Ahmed Ah Mehamed, Abinew Ali Ayele, Ebrahim Chekol Jibril, Michael Melese Woldeyohannis, Olga Kolesnikova, Philipp Slusallek, Dietrich Klakow, and Seid Muhie Yimam. 2024. EthioLLM: Multilingual Large Language Models for Ethiopian Languages with Task Evaluation. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 6341–6352, Torino, Italia. ELRA and ICCL.