@inproceedings{bharati-etal-2026-american,
title = "The {A}merican Palimpsest: Quantifying {S}outh {A}sian {E}nglish Dialect Erasure in {LLM}s",
author = "Bharati, Soumedhik and
Mandal, Shibam and
Ghosh, Swarup Kr and
Mondal, Sayani",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.c3nlp-1.8/",
pages = "108--118",
ISBN = "979-8-89176-420-0",
abstract = "Large Language Models are increasingly deployed as writing assistants for usersin the Global South, yet rewriting prompts can suppress institutionalizedpostcolonial varieties. We quantify South Asian English (SAsE) dialect erasure ina state-of-the-art open-weight model using a 500-sentence diagnostic benchmark(320 lexical and 180 syntactic markers). On Llama 3.3 70B, standard grammarcorrection retains only 26.0{\%} of markers (lexical 31.2{\%}; syntactic 16.7{\%}),while formalization is more destructive (14.0{\%} overall retention). For lexicalitems, we observe Americanization in 56.2{\%} (correction) and 59.4{\%}(formalization) of cases, typically via Standard American paraphrases. A simpledialect-aware prompt raises retention to 92.0{\%} and reduces lexicalAmericanization to 6.2{\%}, although some function-word phenomena remain resistant. A stress test shows evenstronger suppression (6.7{\%} retention). We position dialect erasure withinrepresentational-harm and cultural-competence frameworks, and provide areplicable protocol for auditing writing-assistance systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bharati-etal-2026-american">
<titleInfo>
<title>The American Palimpsest: Quantifying South Asian English Dialect Erasure in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soumedhik</namePart>
<namePart type="family">Bharati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shibam</namePart>
<namePart type="family">Mandal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swarup</namePart>
<namePart type="given">Kr</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sayani</namePart>
<namePart type="family">Mondal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">BOlei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ife</namePart>
<namePart type="family">Adebara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-420-0</identifier>
</relatedItem>
<abstract>Large Language Models are increasingly deployed as writing assistants for usersin the Global South, yet rewriting prompts can suppress institutionalizedpostcolonial varieties. We quantify South Asian English (SAsE) dialect erasure ina state-of-the-art open-weight model using a 500-sentence diagnostic benchmark(320 lexical and 180 syntactic markers). On Llama 3.3 70B, standard grammarcorrection retains only 26.0% of markers (lexical 31.2%; syntactic 16.7%),while formalization is more destructive (14.0% overall retention). For lexicalitems, we observe Americanization in 56.2% (correction) and 59.4%(formalization) of cases, typically via Standard American paraphrases. A simpledialect-aware prompt raises retention to 92.0% and reduces lexicalAmericanization to 6.2%, although some function-word phenomena remain resistant. A stress test shows evenstronger suppression (6.7% retention). We position dialect erasure withinrepresentational-harm and cultural-competence frameworks, and provide areplicable protocol for auditing writing-assistance systems.</abstract>
<identifier type="citekey">bharati-etal-2026-american</identifier>
<location>
<url>https://aclanthology.org/2026.c3nlp-1.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>108</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The American Palimpsest: Quantifying South Asian English Dialect Erasure in LLMs
%A Bharati, Soumedhik
%A Mandal, Shibam
%A Ghosh, Swarup Kr
%A Mondal, Sayani
%Y Prabhakaran, Vinodkumar
%Y Dev, Sunipa
%Y Benotti, Luciana
%Y Hershcovich, Daniel
%Y Cao, Yong
%Y Zhou, Li
%Y Ma, BOlei
%Y Adebara, Ife
%S Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-420-0
%F bharati-etal-2026-american
%X Large Language Models are increasingly deployed as writing assistants for usersin the Global South, yet rewriting prompts can suppress institutionalizedpostcolonial varieties. We quantify South Asian English (SAsE) dialect erasure ina state-of-the-art open-weight model using a 500-sentence diagnostic benchmark(320 lexical and 180 syntactic markers). On Llama 3.3 70B, standard grammarcorrection retains only 26.0% of markers (lexical 31.2%; syntactic 16.7%),while formalization is more destructive (14.0% overall retention). For lexicalitems, we observe Americanization in 56.2% (correction) and 59.4%(formalization) of cases, typically via Standard American paraphrases. A simpledialect-aware prompt raises retention to 92.0% and reduces lexicalAmericanization to 6.2%, although some function-word phenomena remain resistant. A stress test shows evenstronger suppression (6.7% retention). We position dialect erasure withinrepresentational-harm and cultural-competence frameworks, and provide areplicable protocol for auditing writing-assistance systems.
%U https://aclanthology.org/2026.c3nlp-1.8/
%P 108-118
Markdown (Informal)
[The American Palimpsest: Quantifying South Asian English Dialect Erasure in LLMs](https://aclanthology.org/2026.c3nlp-1.8/) (Bharati et al., C3NLP 2026)
ACL