@inproceedings{trivedi-etal-2026-much,
title = "``So, How Much Do {LLM}s Hallucinate on Low-Resource Languages?'' A Quantitative and Qualitative Analysis",
author = "Trivedi, Kushal and
Shaikh, Murtuza and
Sharma, Sriyansh",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Plum, Alistair and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the Second Workshop on Language Models for Low-Resource Languages ({L}o{R}es{LM} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.loreslm-1.24/",
pages = "271--287",
ISBN = "979-8-89176-377-7",
abstract = "Language models have recently gained significant attention in natural language processing, showing strong performance across a wide range of tasks such as text classification, text generation, language modeling, and question answering (Q A). Despite these advances, one of the most critical challenges faced by language models is hallucination {---} the generation of fluent and plausible responses that are factually incorrect or fabricated. This study presents preliminary work on analyzing hallucinations in Q A tasks for low-resource languages. We evaluate model performance on the Mpox-Myanmar and SynDARin datasets using three API-accessible models: LLaMA 3.1 70B, LLaMA 3.1 8B, and Gemini 2.5 {---} and two monolingual language models: HyGPT 10B for Armenian and SeaLLM for Burmese. Our work contributes by systematically examining hallucinations through quantitative analysis using Natural Language Inference and Semantic Similarity metrics across different model sizes and prompting strategies, as well as qualitative analysis through human verification. We further investigate whether common assumptions about model behavior hold consistently and provide explanations for the observed patterns."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="trivedi-etal-2026-much">
<titleInfo>
<title>“So, How Much Do LLMs Hallucinate on Low-Resource Languages?” A Quantitative and Qualitative Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kushal</namePart>
<namePart type="family">Trivedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murtuza</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sriyansh</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Models for Low-Resource Languages (LoResLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alistair</namePart>
<namePart type="family">Plum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-377-7</identifier>
</relatedItem>
<abstract>Language models have recently gained significant attention in natural language processing, showing strong performance across a wide range of tasks such as text classification, text generation, language modeling, and question answering (Q A). Despite these advances, one of the most critical challenges faced by language models is hallucination — the generation of fluent and plausible responses that are factually incorrect or fabricated. This study presents preliminary work on analyzing hallucinations in Q A tasks for low-resource languages. We evaluate model performance on the Mpox-Myanmar and SynDARin datasets using three API-accessible models: LLaMA 3.1 70B, LLaMA 3.1 8B, and Gemini 2.5 — and two monolingual language models: HyGPT 10B for Armenian and SeaLLM for Burmese. Our work contributes by systematically examining hallucinations through quantitative analysis using Natural Language Inference and Semantic Similarity metrics across different model sizes and prompting strategies, as well as qualitative analysis through human verification. We further investigate whether common assumptions about model behavior hold consistently and provide explanations for the observed patterns.</abstract>
<identifier type="citekey">trivedi-etal-2026-much</identifier>
<location>
<url>https://aclanthology.org/2026.loreslm-1.24/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>271</start>
<end>287</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “So, How Much Do LLMs Hallucinate on Low-Resource Languages?” A Quantitative and Qualitative Analysis
%A Trivedi, Kushal
%A Shaikh, Murtuza
%A Sharma, Sriyansh
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Plum, Alistair
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the Second Workshop on Language Models for Low-Resource Languages (LoResLM 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-377-7
%F trivedi-etal-2026-much
%X Language models have recently gained significant attention in natural language processing, showing strong performance across a wide range of tasks such as text classification, text generation, language modeling, and question answering (Q A). Despite these advances, one of the most critical challenges faced by language models is hallucination — the generation of fluent and plausible responses that are factually incorrect or fabricated. This study presents preliminary work on analyzing hallucinations in Q A tasks for low-resource languages. We evaluate model performance on the Mpox-Myanmar and SynDARin datasets using three API-accessible models: LLaMA 3.1 70B, LLaMA 3.1 8B, and Gemini 2.5 — and two monolingual language models: HyGPT 10B for Armenian and SeaLLM for Burmese. Our work contributes by systematically examining hallucinations through quantitative analysis using Natural Language Inference and Semantic Similarity metrics across different model sizes and prompting strategies, as well as qualitative analysis through human verification. We further investigate whether common assumptions about model behavior hold consistently and provide explanations for the observed patterns.
%U https://aclanthology.org/2026.loreslm-1.24/
%P 271-287
Markdown (Informal)
["So, How Much Do LLMs Hallucinate on Low-Resource Languages?" A Quantitative and Qualitative Analysis](https://aclanthology.org/2026.loreslm-1.24/) (Trivedi et al., LoResLM 2026)
ACL