@inproceedings{bonilla-2026-llm,
title = "{LLM}-Adapted Colombian {S}panish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion",
author = "Bonilla, Johnatan E.",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.c3nlp-1.5/",
pages = "67--75",
ISBN = "979-8-89176-420-0",
abstract = "We evaluate whether open-source LLMs can produce proficiency-graded English adaptations of entries from the \textit{Diccionario de colombianismos} (DiCol), a Colombian Spanish lexicographic resource used in language teaching. Three 7{--}8B instruction-tuned models{---}Llama{~}3.1, Qwen2.5, and Mistral{---}generate Beginner, Intermediate, and Advanced translations for all 8{,}252 definitions using structured zero-shot prompts identical across levels except for the target CEFR band. Automated metrics show that Intermediate targeting collapses (73{--}83{\%} classified as Advanced by vocabulary, $\chi^2 > 705$, $p < .001$) and that Advanced outputs expand 4.9{--}8.2$\times$ relative to the source. Expert annotation of a 360-entry stratified sample ($\kappa = 0.61${--}0.68) identifies hallucination in 19{\%} of entries (Fleiss' $\kappa = 0.77$ for cultural preservation categories, 97{\%} unanimity among flagged cases). Hallucination concentrates in the Advanced condition (81{\%}, $\chi^2 = 86.6$, $p < .001$) and is associated with higher expansion ($U = 16{,}662$, $p < .001$, $r = 0.68$), manifesting primarily as generic elaboration and, in a smaller proportion, as Colombia-stereotyping and pragmatic polarity inversion. We discuss these findings through the lens of (CITATION){'}s domestication framework and describe the observed pattern as \textit{algorithmic domestication}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bonilla-2026-llm">
<titleInfo>
<title>LLM-Adapted Colombian Spanish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johnatan</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Bonilla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">BOlei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ife</namePart>
<namePart type="family">Adebara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-420-0</identifier>
</relatedItem>
<abstract>We evaluate whether open-source LLMs can produce proficiency-graded English adaptations of entries from the Diccionario de colombianismos (DiCol), a Colombian Spanish lexicographic resource used in language teaching. Three 7–8B instruction-tuned models—Llama 3.1, Qwen2.5, and Mistral—generate Beginner, Intermediate, and Advanced translations for all 8,252 definitions using structured zero-shot prompts identical across levels except for the target CEFR band. Automated metrics show that Intermediate targeting collapses (73–83% classified as Advanced by vocabulary, χ² > 705, p < .001) and that Advanced outputs expand 4.9–8.2\times relative to the source. Expert annotation of a 360-entry stratified sample (ąppa = 0.61–0.68) identifies hallucination in 19% of entries (Fleiss’ ąppa = 0.77 for cultural preservation categories, 97% unanimity among flagged cases). Hallucination concentrates in the Advanced condition (81%, χ² = 86.6, p < .001) and is associated with higher expansion (U = 16,662, p < .001, r = 0.68), manifesting primarily as generic elaboration and, in a smaller proportion, as Colombia-stereotyping and pragmatic polarity inversion. We discuss these findings through the lens of (CITATION)’s domestication framework and describe the observed pattern as algorithmic domestication.</abstract>
<identifier type="citekey">bonilla-2026-llm</identifier>
<location>
<url>https://aclanthology.org/2026.c3nlp-1.5/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>67</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM-Adapted Colombian Spanish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion
%A Bonilla, Johnatan E.
%Y Prabhakaran, Vinodkumar
%Y Dev, Sunipa
%Y Benotti, Luciana
%Y Hershcovich, Daniel
%Y Cao, Yong
%Y Zhou, Li
%Y Ma, BOlei
%Y Adebara, Ife
%S Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-420-0
%F bonilla-2026-llm
%X We evaluate whether open-source LLMs can produce proficiency-graded English adaptations of entries from the Diccionario de colombianismos (DiCol), a Colombian Spanish lexicographic resource used in language teaching. Three 7–8B instruction-tuned models—Llama 3.1, Qwen2.5, and Mistral—generate Beginner, Intermediate, and Advanced translations for all 8,252 definitions using structured zero-shot prompts identical across levels except for the target CEFR band. Automated metrics show that Intermediate targeting collapses (73–83% classified as Advanced by vocabulary, χ² > 705, p < .001) and that Advanced outputs expand 4.9–8.2\times relative to the source. Expert annotation of a 360-entry stratified sample (ąppa = 0.61–0.68) identifies hallucination in 19% of entries (Fleiss’ ąppa = 0.77 for cultural preservation categories, 97% unanimity among flagged cases). Hallucination concentrates in the Advanced condition (81%, χ² = 86.6, p < .001) and is associated with higher expansion (U = 16,662, p < .001, r = 0.68), manifesting primarily as generic elaboration and, in a smaller proportion, as Colombia-stereotyping and pragmatic polarity inversion. We discuss these findings through the lens of (CITATION)’s domestication framework and describe the observed pattern as algorithmic domestication.
%U https://aclanthology.org/2026.c3nlp-1.5/
%P 67-75
Markdown (Informal)
[LLM-Adapted Colombian Spanish Lexicography: Proficiency Control, Hallucination, and Cultural Distortion](https://aclanthology.org/2026.c3nlp-1.5/) (Bonilla, C3NLP 2026)
ACL