@inproceedings{pandey-etal-2026-beyond,
title = "Beyond Monolithic Culture: Evaluating Understandability of Online Text Across Cultural Dimensions",
author = "Pandey, Saurabh Kumar and
Gupta, Harshit and
Saha, Sougata and
Choudhury, Monojit",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cao, Yong and
Zhou, Li and
Ma, BOlei and
Adebara, Ife",
booktitle = "Proceedings of the 4th Workshop on Cross-Cultural Considerations in {NLP} ({C}3{NLP} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.c3nlp-1.16/",
pages = "204--220",
ISBN = "979-8-89176-420-0",
abstract = "Culture shapes how people interpret language, especially in online reviews containing culture-specific items (CSIs). Yet, most existing evaluations treat culture as a monolithic construct, offering no insight into which cultural dimensions pose difficulty for readers, or how large language models (LLMs), which power AI reading assistants, perform across them. This gap limits our ability to obtain reliable, cross-cultural estimates of model performance. To address this, we analyze CSIs in English Goodreads reviews across Newmark{'}s cultural dimensions (e.g., material, ecology, customs, habits, social) and evaluate six LLMs of varying sizes on their ability to identify CSIs within each dimension. We find that readers struggle most with CSIs from the material, customs, and social dimensions, while models underperform on more localized ones (e.g., habits), revealing systematic cultural blind spots. To support further research on culturally representative benchmarking, we release an expert-annotated dataset of CSIs labeled by cultural dimension. Empirical analysis shows our dataset as more challenging and of higher quality than existing cultural benchmarks, enabling finer-grained evaluation of cultural understanding in models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pandey-etal-2026-beyond">
<titleInfo>
<title>Beyond Monolithic Culture: Evaluating Understandability of Online Text Across Cultural Dimensions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Pandey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harshit</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sougata</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monojit</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinodkumar</namePart>
<namePart type="family">Prabhakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunipa</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Benotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Hershcovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yong</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">BOlei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ife</namePart>
<namePart type="family">Adebara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-420-0</identifier>
</relatedItem>
<abstract>Culture shapes how people interpret language, especially in online reviews containing culture-specific items (CSIs). Yet, most existing evaluations treat culture as a monolithic construct, offering no insight into which cultural dimensions pose difficulty for readers, or how large language models (LLMs), which power AI reading assistants, perform across them. This gap limits our ability to obtain reliable, cross-cultural estimates of model performance. To address this, we analyze CSIs in English Goodreads reviews across Newmark’s cultural dimensions (e.g., material, ecology, customs, habits, social) and evaluate six LLMs of varying sizes on their ability to identify CSIs within each dimension. We find that readers struggle most with CSIs from the material, customs, and social dimensions, while models underperform on more localized ones (e.g., habits), revealing systematic cultural blind spots. To support further research on culturally representative benchmarking, we release an expert-annotated dataset of CSIs labeled by cultural dimension. Empirical analysis shows our dataset as more challenging and of higher quality than existing cultural benchmarks, enabling finer-grained evaluation of cultural understanding in models.</abstract>
<identifier type="citekey">pandey-etal-2026-beyond</identifier>
<location>
<url>https://aclanthology.org/2026.c3nlp-1.16/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>204</start>
<end>220</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Monolithic Culture: Evaluating Understandability of Online Text Across Cultural Dimensions
%A Pandey, Saurabh Kumar
%A Gupta, Harshit
%A Saha, Sougata
%A Choudhury, Monojit
%Y Prabhakaran, Vinodkumar
%Y Dev, Sunipa
%Y Benotti, Luciana
%Y Hershcovich, Daniel
%Y Cao, Yong
%Y Zhou, Li
%Y Ma, BOlei
%Y Adebara, Ife
%S Proceedings of the 4th Workshop on Cross-Cultural Considerations in NLP (C3NLP 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-420-0
%F pandey-etal-2026-beyond
%X Culture shapes how people interpret language, especially in online reviews containing culture-specific items (CSIs). Yet, most existing evaluations treat culture as a monolithic construct, offering no insight into which cultural dimensions pose difficulty for readers, or how large language models (LLMs), which power AI reading assistants, perform across them. This gap limits our ability to obtain reliable, cross-cultural estimates of model performance. To address this, we analyze CSIs in English Goodreads reviews across Newmark’s cultural dimensions (e.g., material, ecology, customs, habits, social) and evaluate six LLMs of varying sizes on their ability to identify CSIs within each dimension. We find that readers struggle most with CSIs from the material, customs, and social dimensions, while models underperform on more localized ones (e.g., habits), revealing systematic cultural blind spots. To support further research on culturally representative benchmarking, we release an expert-annotated dataset of CSIs labeled by cultural dimension. Empirical analysis shows our dataset as more challenging and of higher quality than existing cultural benchmarks, enabling finer-grained evaluation of cultural understanding in models.
%U https://aclanthology.org/2026.c3nlp-1.16/
%P 204-220
Markdown (Informal)
[Beyond Monolithic Culture: Evaluating Understandability of Online Text Across Cultural Dimensions](https://aclanthology.org/2026.c3nlp-1.16/) (Pandey et al., C3NLP 2026)
ACL