@inproceedings{kim-kim-2025-dual,
title = "A Dual-Layered Evaluation of Geopolitical and Cultural Bias in {LLM}s",
author = "Kim, Sean and
Kim, Hyuhng Joon",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.38/",
doi = "10.18653/v1/2025.acl-srw.38",
pages = "580--595",
ISBN = "979-8-89176-254-1",
abstract = "As large language models (LLMs) are increasingly deployed across diverse linguistic and cultural contexts, understanding their behavior in both factual and disputable scenarios is essential{---}especially when their outputs may shape public opinion or reinforce dominant narratives. In this paper, we define two types of bias in LLMs: model bias (bias stemming from model training) and inference bias (bias induced by the language of the query), through a two-phase evaluation.Phase 1 evaluates LLMs on factual questions where a single verifiable answer exists, assessing whether models maintain consistency across different query languages. Phase 2 expands the scope by probing geopolitically sensitive disputes, where responses may reflect culturally embedded or ideologically aligned perspectives. We construct a manually curated dataset spanning both factual and disputable QA, across four languages and question types. The results show that Phase 1 exhibits query language-induced alignment, while Phase 2 reflects an interplay between the model{'}s training context and query language. This paper offers a structured framework for evaluating LLM behavior across neutral and sensitive topics, providing insights for future LLM deployment and culturally-aware evaluation practices in multilingual contexts.WARNING: this paper covers East Asian issues which may be politically sensitive."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-kim-2025-dual">
<titleInfo>
<title>A Dual-Layered Evaluation of Geopolitical and Cultural Bias in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyuhng</namePart>
<namePart type="given">Joon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>As large language models (LLMs) are increasingly deployed across diverse linguistic and cultural contexts, understanding their behavior in both factual and disputable scenarios is essential—especially when their outputs may shape public opinion or reinforce dominant narratives. In this paper, we define two types of bias in LLMs: model bias (bias stemming from model training) and inference bias (bias induced by the language of the query), through a two-phase evaluation.Phase 1 evaluates LLMs on factual questions where a single verifiable answer exists, assessing whether models maintain consistency across different query languages. Phase 2 expands the scope by probing geopolitically sensitive disputes, where responses may reflect culturally embedded or ideologically aligned perspectives. We construct a manually curated dataset spanning both factual and disputable QA, across four languages and question types. The results show that Phase 1 exhibits query language-induced alignment, while Phase 2 reflects an interplay between the model’s training context and query language. This paper offers a structured framework for evaluating LLM behavior across neutral and sensitive topics, providing insights for future LLM deployment and culturally-aware evaluation practices in multilingual contexts.WARNING: this paper covers East Asian issues which may be politically sensitive.</abstract>
<identifier type="citekey">kim-kim-2025-dual</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.38</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.38/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>580</start>
<end>595</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dual-Layered Evaluation of Geopolitical and Cultural Bias in LLMs
%A Kim, Sean
%A Kim, Hyuhng Joon
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F kim-kim-2025-dual
%X As large language models (LLMs) are increasingly deployed across diverse linguistic and cultural contexts, understanding their behavior in both factual and disputable scenarios is essential—especially when their outputs may shape public opinion or reinforce dominant narratives. In this paper, we define two types of bias in LLMs: model bias (bias stemming from model training) and inference bias (bias induced by the language of the query), through a two-phase evaluation.Phase 1 evaluates LLMs on factual questions where a single verifiable answer exists, assessing whether models maintain consistency across different query languages. Phase 2 expands the scope by probing geopolitically sensitive disputes, where responses may reflect culturally embedded or ideologically aligned perspectives. We construct a manually curated dataset spanning both factual and disputable QA, across four languages and question types. The results show that Phase 1 exhibits query language-induced alignment, while Phase 2 reflects an interplay between the model’s training context and query language. This paper offers a structured framework for evaluating LLM behavior across neutral and sensitive topics, providing insights for future LLM deployment and culturally-aware evaluation practices in multilingual contexts.WARNING: this paper covers East Asian issues which may be politically sensitive.
%R 10.18653/v1/2025.acl-srw.38
%U https://aclanthology.org/2025.acl-srw.38/
%U https://doi.org/10.18653/v1/2025.acl-srw.38
%P 580-595
Markdown (Informal)
[A Dual-Layered Evaluation of Geopolitical and Cultural Bias in LLMs](https://aclanthology.org/2025.acl-srw.38/) (Kim & Kim, ACL 2025)
ACL