@inproceedings{dargis-etal-2024-evaluating,
title = "Evaluating Open-Source {LLM}s in Low-Resource Languages: Insights from {L}atvian High School Exams",
author = "Dar{\c{g}}is, Roberts and
B{\=a}rzdi{\c{n}}{\v{s}}, Guntis and
Skadi{\c{n}}a, Inguna and
Saulite, Baiba",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Miyagawa, So and
Alnajjar, Khalid and
Bizzoni, Yuri},
booktitle = "Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities",
month = nov,
year = "2024",
address = "Miami, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4dh-1.28",
pages = "289--293",
abstract = "The latest large language models (LLM) have significantly advanced natural language processing (NLP) capabilities across various tasks. However, their performance in low-resource languages, such as Latvian with 1.5 million native speakers, remains substantially underexplored due to both limited training data and the absence of comprehensive evaluation benchmarks. This study addresses this gap by conducting a systematic assessment of prominent open-source LLMs on natural language understanding (NLU) and natural language generation (NLG) tasks in Latvian. We utilize standardized high school centralized graduation exams as a benchmark dataset, offering relatable and diverse evaluation scenarios that encompass multiple-choice questions and complex text analysis tasks. Our experimental setup involves testing models from the leading LLM families, including Llama, Qwen, Gemma, and Mistral, with OpenAI{'}s GPT-4 serving as a performance reference. The results reveal that certain open-source models demonstrate competitive performance in NLU tasks, narrowing the gap with GPT-4. However, all models exhibit notable deficiencies in NLG tasks, specifically in generating coherent and contextually appropriate text analyses, highlighting persistent challenges in NLG for low-resource languages. These findings contribute to efforts to develop robust multilingual benchmarks and improve LLM performance in diverse linguistic contexts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dargis-etal-2024-evaluating">
<titleInfo>
<title>Evaluating Open-Source LLMs in Low-Resource Languages: Insights from Latvian High School Exams</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roberts</namePart>
<namePart type="family">Darģis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guntis</namePart>
<namePart type="family">Bārzdiņš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inguna</namePart>
<namePart type="family">Skadiņa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baiba</namePart>
<namePart type="family">Saulite</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The latest large language models (LLM) have significantly advanced natural language processing (NLP) capabilities across various tasks. However, their performance in low-resource languages, such as Latvian with 1.5 million native speakers, remains substantially underexplored due to both limited training data and the absence of comprehensive evaluation benchmarks. This study addresses this gap by conducting a systematic assessment of prominent open-source LLMs on natural language understanding (NLU) and natural language generation (NLG) tasks in Latvian. We utilize standardized high school centralized graduation exams as a benchmark dataset, offering relatable and diverse evaluation scenarios that encompass multiple-choice questions and complex text analysis tasks. Our experimental setup involves testing models from the leading LLM families, including Llama, Qwen, Gemma, and Mistral, with OpenAI’s GPT-4 serving as a performance reference. The results reveal that certain open-source models demonstrate competitive performance in NLU tasks, narrowing the gap with GPT-4. However, all models exhibit notable deficiencies in NLG tasks, specifically in generating coherent and contextually appropriate text analyses, highlighting persistent challenges in NLG for low-resource languages. These findings contribute to efforts to develop robust multilingual benchmarks and improve LLM performance in diverse linguistic contexts.</abstract>
<identifier type="citekey">dargis-etal-2024-evaluating</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4dh-1.28</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>289</start>
<end>293</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Open-Source LLMs in Low-Resource Languages: Insights from Latvian High School Exams
%A Darģis, Roberts
%A Bārzdiņš, Guntis
%A Skadiņa, Inguna
%A Saulite, Baiba
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Miyagawa, So
%Y Alnajjar, Khalid
%Y Bizzoni, Yuri
%S Proceedings of the 4th International Conference on Natural Language Processing for Digital Humanities
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, USA
%F dargis-etal-2024-evaluating
%X The latest large language models (LLM) have significantly advanced natural language processing (NLP) capabilities across various tasks. However, their performance in low-resource languages, such as Latvian with 1.5 million native speakers, remains substantially underexplored due to both limited training data and the absence of comprehensive evaluation benchmarks. This study addresses this gap by conducting a systematic assessment of prominent open-source LLMs on natural language understanding (NLU) and natural language generation (NLG) tasks in Latvian. We utilize standardized high school centralized graduation exams as a benchmark dataset, offering relatable and diverse evaluation scenarios that encompass multiple-choice questions and complex text analysis tasks. Our experimental setup involves testing models from the leading LLM families, including Llama, Qwen, Gemma, and Mistral, with OpenAI’s GPT-4 serving as a performance reference. The results reveal that certain open-source models demonstrate competitive performance in NLU tasks, narrowing the gap with GPT-4. However, all models exhibit notable deficiencies in NLG tasks, specifically in generating coherent and contextually appropriate text analyses, highlighting persistent challenges in NLG for low-resource languages. These findings contribute to efforts to develop robust multilingual benchmarks and improve LLM performance in diverse linguistic contexts.
%U https://aclanthology.org/2024.nlp4dh-1.28
%P 289-293
Markdown (Informal)
[Evaluating Open-Source LLMs in Low-Resource Languages: Insights from Latvian High School Exams](https://aclanthology.org/2024.nlp4dh-1.28) (Darģis et al., NLP4DH 2024)
ACL