@inproceedings{assem-2026-whose,
title = "Whose Pragmatics? Cultural Grounding as a Bottleneck for Stereotype Detection in {E}gyptian {A}rabic Social Media",
author = "Assem, Samar A.",
editor = "Ma, Weicheng and
Vosoughi, Soroush and
Gillani, Nabeel and
Coto-Solano, Rolando",
booktitle = "Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies ({S}tere{AC}u{LT} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.stereacult-1.7/",
pages = "69--78",
ISBN = "979-8-89176-408-8",
abstract = "Stereotype detection benchmarks assume that stereotyping occurs through what is said {---} via lexical co-occurrence between demographic terms and stereotypical attributes. We argue that stereotyping is often conveyed by what is meant: through presupposition, implicature, and speech-act framing that leave surface content unchanged while embedding prejudice in the pragmatic layer. We call this phenomenon pragmatic stereotyping. Evaluating GPT-4 and Claude 3.5 Sonnet on a stratified sample of 500 Egyptian Arabic social media comments annotated with a seven-tag sentiment/(im)politeness taxonomy, we find that cultural grounding is the critical bottleneck in detecting pragmatic stereotyping in non-English discourse. About 35{\%} of LLM errors result from cultural grounding gaps, leading to a 15-percentage-point F1 difference between explicit tags (0.81) and implicit tags (0.66). These failures are bidirectional: on the author side, LLMs under-detect prejudice encoded through concessive presupposition and backhanded compliments; on the model side, LLMs apply English-based pragmatic assumptions, misinterpreting genuine polite criticism as sarcasm and positive-intended impoliteness as conflictive. Our five-layer Chain-of-Thought diagnostic framework localizes these failures to the culture-dependent inference layers. These results extend stereotype evaluation beyond lexical benchmarks and have direct implications for content moderation pipelines serving Arabic-speaking communities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="assem-2026-whose">
<titleInfo>
<title>Whose Pragmatics? Cultural Grounding as a Bottleneck for Stereotype Detection in Egyptian Arabic Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Assem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weicheng</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Vosoughi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabeel</namePart>
<namePart type="family">Gillani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-408-8</identifier>
</relatedItem>
<abstract>Stereotype detection benchmarks assume that stereotyping occurs through what is said — via lexical co-occurrence between demographic terms and stereotypical attributes. We argue that stereotyping is often conveyed by what is meant: through presupposition, implicature, and speech-act framing that leave surface content unchanged while embedding prejudice in the pragmatic layer. We call this phenomenon pragmatic stereotyping. Evaluating GPT-4 and Claude 3.5 Sonnet on a stratified sample of 500 Egyptian Arabic social media comments annotated with a seven-tag sentiment/(im)politeness taxonomy, we find that cultural grounding is the critical bottleneck in detecting pragmatic stereotyping in non-English discourse. About 35% of LLM errors result from cultural grounding gaps, leading to a 15-percentage-point F1 difference between explicit tags (0.81) and implicit tags (0.66). These failures are bidirectional: on the author side, LLMs under-detect prejudice encoded through concessive presupposition and backhanded compliments; on the model side, LLMs apply English-based pragmatic assumptions, misinterpreting genuine polite criticism as sarcasm and positive-intended impoliteness as conflictive. Our five-layer Chain-of-Thought diagnostic framework localizes these failures to the culture-dependent inference layers. These results extend stereotype evaluation beyond lexical benchmarks and have direct implications for content moderation pipelines serving Arabic-speaking communities.</abstract>
<identifier type="citekey">assem-2026-whose</identifier>
<location>
<url>https://aclanthology.org/2026.stereacult-1.7/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>69</start>
<end>78</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Whose Pragmatics? Cultural Grounding as a Bottleneck for Stereotype Detection in Egyptian Arabic Social Media
%A Assem, Samar A.
%Y Ma, Weicheng
%Y Vosoughi, Soroush
%Y Gillani, Nabeel
%Y Coto-Solano, Rolando
%S Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-408-8
%F assem-2026-whose
%X Stereotype detection benchmarks assume that stereotyping occurs through what is said — via lexical co-occurrence between demographic terms and stereotypical attributes. We argue that stereotyping is often conveyed by what is meant: through presupposition, implicature, and speech-act framing that leave surface content unchanged while embedding prejudice in the pragmatic layer. We call this phenomenon pragmatic stereotyping. Evaluating GPT-4 and Claude 3.5 Sonnet on a stratified sample of 500 Egyptian Arabic social media comments annotated with a seven-tag sentiment/(im)politeness taxonomy, we find that cultural grounding is the critical bottleneck in detecting pragmatic stereotyping in non-English discourse. About 35% of LLM errors result from cultural grounding gaps, leading to a 15-percentage-point F1 difference between explicit tags (0.81) and implicit tags (0.66). These failures are bidirectional: on the author side, LLMs under-detect prejudice encoded through concessive presupposition and backhanded compliments; on the model side, LLMs apply English-based pragmatic assumptions, misinterpreting genuine polite criticism as sarcasm and positive-intended impoliteness as conflictive. Our five-layer Chain-of-Thought diagnostic framework localizes these failures to the culture-dependent inference layers. These results extend stereotype evaluation beyond lexical benchmarks and have direct implications for content moderation pipelines serving Arabic-speaking communities.
%U https://aclanthology.org/2026.stereacult-1.7/
%P 69-78
Markdown (Informal)
[Whose Pragmatics? Cultural Grounding as a Bottleneck for Stereotype Detection in Egyptian Arabic Social Media](https://aclanthology.org/2026.stereacult-1.7/) (Assem, StereACuLT 2026)
ACL