@inproceedings{kapociute-dzikiene-etal-2025-localizing,
title = "Localizing {AI:} Evaluating Open-Weight Language Models for Languages of {B}altic States",
author = "Kapo{\v{c}}i{\={u}}t{\.{e}}-Dzikien{\.{e}}, Jurgita and
Bergmanis, Toms and
Pinnis, M{\={a}}rcis",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.30/",
pages = "287--295",
ISBN = "978-9908-53-109-0",
abstract = "Although large language models (LLMs) have transformed our expectations of modern language technologies, concerns over data privacy often restrict the use of commercially available LLMs hosted outside of EU jurisdictions. This limits their application in governmental, defense, and other data-sensitive sectors. In this work, we evaluate the extent to which locally deployable open-weight large language models support lesser-spoken languages such as Lithuanian, Latvian, and Estonian. We examine various size and precision variants of the top-performing multilingual open-weight models, Llama 3, Gemma 2, Phi, and NeMo, on machine translation, multiple-choice question answering, and free-form text generation. The results indicate that while certain models like Gemma 2 perform close to the top commercially available models, many LLMs struggle with these languages. Most surprisingly, however, we find that these models, while showing close to state-of-the-art translation performance, are still prone to lexical hallucinations with errors in at least 1 in 20 words for all open-weight multilingual LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kapociute-dzikiene-etal-2025-localizing">
<titleInfo>
<title>Localizing AI: Evaluating Open-Weight Language Models for Languages of Baltic States</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jurgita</namePart>
<namePart type="family">Kapočiūtė-Dzikienė</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toms</namePart>
<namePart type="family">Bergmanis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mārcis</namePart>
<namePart type="family">Pinnis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>Although large language models (LLMs) have transformed our expectations of modern language technologies, concerns over data privacy often restrict the use of commercially available LLMs hosted outside of EU jurisdictions. This limits their application in governmental, defense, and other data-sensitive sectors. In this work, we evaluate the extent to which locally deployable open-weight large language models support lesser-spoken languages such as Lithuanian, Latvian, and Estonian. We examine various size and precision variants of the top-performing multilingual open-weight models, Llama 3, Gemma 2, Phi, and NeMo, on machine translation, multiple-choice question answering, and free-form text generation. The results indicate that while certain models like Gemma 2 perform close to the top commercially available models, many LLMs struggle with these languages. Most surprisingly, however, we find that these models, while showing close to state-of-the-art translation performance, are still prone to lexical hallucinations with errors in at least 1 in 20 words for all open-weight multilingual LLMs.</abstract>
<identifier type="citekey">kapociute-dzikiene-etal-2025-localizing</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.30/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>287</start>
<end>295</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Localizing AI: Evaluating Open-Weight Language Models for Languages of Baltic States
%A Kapočiūtė-Dzikienė, Jurgita
%A Bergmanis, Toms
%A Pinnis, Mārcis
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F kapociute-dzikiene-etal-2025-localizing
%X Although large language models (LLMs) have transformed our expectations of modern language technologies, concerns over data privacy often restrict the use of commercially available LLMs hosted outside of EU jurisdictions. This limits their application in governmental, defense, and other data-sensitive sectors. In this work, we evaluate the extent to which locally deployable open-weight large language models support lesser-spoken languages such as Lithuanian, Latvian, and Estonian. We examine various size and precision variants of the top-performing multilingual open-weight models, Llama 3, Gemma 2, Phi, and NeMo, on machine translation, multiple-choice question answering, and free-form text generation. The results indicate that while certain models like Gemma 2 perform close to the top commercially available models, many LLMs struggle with these languages. Most surprisingly, however, we find that these models, while showing close to state-of-the-art translation performance, are still prone to lexical hallucinations with errors in at least 1 in 20 words for all open-weight multilingual LLMs.
%U https://aclanthology.org/2025.nodalida-1.30/
%P 287-295
Markdown (Informal)
[Localizing AI: Evaluating Open-Weight Language Models for Languages of Baltic States](https://aclanthology.org/2025.nodalida-1.30/) (Kapočiūtė-Dzikienė et al., NoDaLiDa 2025)
ACL