@inproceedings{choudhary-etal-2026-llms,
title = "Do {LLM}s model human linguistic variation? A case study in {H}indi-{E}nglish Verb code-mixing",
author = "Choudhary, Mukund and
Jindal, Madhur and
Aeron, Gaurja and
Choudhury, Monojit",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.291/",
pages = "5491--5509",
ISBN = "979-8-89176-386-9",
abstract = "Do large language models (LLMs) model linguistic variation? We investigate this question through Hindi-English (Hinglish) verb code-mixing, where speakers can use either a Hindi verb or an English verb with the light verb karna ({'}do'). Both forms are grammatical, but speakers show unexplained variation in language choice for the verb. We compare human preferences on controlled code-mixed minimal pairs to LLM perplexities spanning families, sizes, and training language compositions. We find that current LLMs do not reliably classify verb language preferences to match native speaker judgments. We also see that with specific supervision, some models do predict human preference to an extent. We release native speaker acceptability judgments on 30 verb pairs, perplexity ratios for 4,279 verb pairs across 7 models, and experimental materials."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choudhary-etal-2026-llms">
<titleInfo>
<title>Do LLMs model human linguistic variation? A case study in Hindi-English Verb code-mixing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mukund</namePart>
<namePart type="family">Choudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madhur</namePart>
<namePart type="family">Jindal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaurja</namePart>
<namePart type="family">Aeron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Do large language models (LLMs) model linguistic variation? We investigate this question through Hindi-English (Hinglish) verb code-mixing, where speakers can use either a Hindi verb or an English verb with the light verb karna (’do’). Both forms are grammatical, but speakers show unexplained variation in language choice for the verb. We compare human preferences on controlled code-mixed minimal pairs to LLM perplexities spanning families, sizes, and training language compositions. We find that current LLMs do not reliably classify verb language preferences to match native speaker judgments. We also see that with specific supervision, some models do predict human preference to an extent. We release native speaker acceptability judgments on 30 verb pairs, perplexity ratios for 4,279 verb pairs across 7 models, and experimental materials.</abstract>
<identifier type="citekey">choudhary-etal-2026-llms</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.291/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5491</start>
<end>5509</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do LLMs model human linguistic variation? A case study in Hindi-English Verb code-mixing
%A Choudhary, Mukund
%A Jindal, Madhur
%A Aeron, Gaurja
%A Choudhury, Monojit
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F choudhary-etal-2026-llms
%X Do large language models (LLMs) model linguistic variation? We investigate this question through Hindi-English (Hinglish) verb code-mixing, where speakers can use either a Hindi verb or an English verb with the light verb karna (’do’). Both forms are grammatical, but speakers show unexplained variation in language choice for the verb. We compare human preferences on controlled code-mixed minimal pairs to LLM perplexities spanning families, sizes, and training language compositions. We find that current LLMs do not reliably classify verb language preferences to match native speaker judgments. We also see that with specific supervision, some models do predict human preference to an extent. We release native speaker acceptability judgments on 30 verb pairs, perplexity ratios for 4,279 verb pairs across 7 models, and experimental materials.
%U https://aclanthology.org/2026.findings-eacl.291/
%P 5491-5509
Markdown (Informal)
[Do LLMs model human linguistic variation? A case study in Hindi-English Verb code-mixing](https://aclanthology.org/2026.findings-eacl.291/) (Choudhary et al., Findings 2026)
ACL