@inproceedings{salari-etal-2026-uncovering,
title = "Uncovering Ideological Bias in {RAG} with Lexical Multidimensional Analysis: A Case Study on {COVID}-19",
author = "Salari, Elmira and
Delfino, Maria Claudia Nunes and
Amamou, Hazem and
de Souza, Jos{\'e} Victor and
Kshirsagar, Shruti and
Davoust, Alan and
Avila, Anderson",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.7/",
pages = "111--124",
ISBN = "979-8-89176-413-2",
abstract = "This paper studies the impact of retrieved ideologically framed texts on the outputs of large language models (LLMs). While interest in understanding ideological framing in LLMs has recently increased, little attention has been given to this issue in the context of Retrieval-Augmented Generation (RAG). To fill this gap, we design an external knowledge source based on ideologically framed texts about COVID-19 treatments. Our corpus is based on 1,117 academic articles representing discourses about controversial and endorsed treatments for the disease. We propose a corpus linguistics framework, based on Lexical Multidimensional Analysis (LMDA), to identify discourse dimensions within the corpus. LLMs are tasked to answer questions derived from three identified discourse dimensions, and two types of contextual prompts are adopted: the first comprises the user question and ideologically framed texts; and the second contains the question, ideologically framed texts, and LMDA descriptions. Alignment between reference ideologically framed texts and LLMs' responses is assessed using cosine similarity for lexical and semantic representations. Results demonstrate that retrieved ideologically framed texts influence LLM responses toward the discourse framing represented in the external knowledge, with enhanced prompts further amplifying this effect. Our findings highlight the importance of identifying ideological framings within the RAG framework in order to mitigate not just unintended ideological bias, but also the risks of intentional discourse steering of such models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="salari-etal-2026-uncovering">
<titleInfo>
<title>Uncovering Ideological Bias in RAG with Lexical Multidimensional Analysis: A Case Study on COVID-19</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elmira</namePart>
<namePart type="family">Salari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Claudia</namePart>
<namePart type="given">Nunes</namePart>
<namePart type="family">Delfino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hazem</namePart>
<namePart type="family">Amamou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">Victor</namePart>
<namePart type="family">de Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Kshirsagar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Davoust</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anderson</namePart>
<namePart type="family">Avila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>This paper studies the impact of retrieved ideologically framed texts on the outputs of large language models (LLMs). While interest in understanding ideological framing in LLMs has recently increased, little attention has been given to this issue in the context of Retrieval-Augmented Generation (RAG). To fill this gap, we design an external knowledge source based on ideologically framed texts about COVID-19 treatments. Our corpus is based on 1,117 academic articles representing discourses about controversial and endorsed treatments for the disease. We propose a corpus linguistics framework, based on Lexical Multidimensional Analysis (LMDA), to identify discourse dimensions within the corpus. LLMs are tasked to answer questions derived from three identified discourse dimensions, and two types of contextual prompts are adopted: the first comprises the user question and ideologically framed texts; and the second contains the question, ideologically framed texts, and LMDA descriptions. Alignment between reference ideologically framed texts and LLMs’ responses is assessed using cosine similarity for lexical and semantic representations. Results demonstrate that retrieved ideologically framed texts influence LLM responses toward the discourse framing represented in the external knowledge, with enhanced prompts further amplifying this effect. Our findings highlight the importance of identifying ideological framings within the RAG framework in order to mitigate not just unintended ideological bias, but also the risks of intentional discourse steering of such models.</abstract>
<identifier type="citekey">salari-etal-2026-uncovering</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.7/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>111</start>
<end>124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uncovering Ideological Bias in RAG with Lexical Multidimensional Analysis: A Case Study on COVID-19
%A Salari, Elmira
%A Delfino, Maria Claudia Nunes
%A Amamou, Hazem
%A de Souza, José Victor
%A Kshirsagar, Shruti
%A Davoust, Alan
%A Avila, Anderson
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F salari-etal-2026-uncovering
%X This paper studies the impact of retrieved ideologically framed texts on the outputs of large language models (LLMs). While interest in understanding ideological framing in LLMs has recently increased, little attention has been given to this issue in the context of Retrieval-Augmented Generation (RAG). To fill this gap, we design an external knowledge source based on ideologically framed texts about COVID-19 treatments. Our corpus is based on 1,117 academic articles representing discourses about controversial and endorsed treatments for the disease. We propose a corpus linguistics framework, based on Lexical Multidimensional Analysis (LMDA), to identify discourse dimensions within the corpus. LLMs are tasked to answer questions derived from three identified discourse dimensions, and two types of contextual prompts are adopted: the first comprises the user question and ideologically framed texts; and the second contains the question, ideologically framed texts, and LMDA descriptions. Alignment between reference ideologically framed texts and LLMs’ responses is assessed using cosine similarity for lexical and semantic representations. Results demonstrate that retrieved ideologically framed texts influence LLM responses toward the discourse framing represented in the external knowledge, with enhanced prompts further amplifying this effect. Our findings highlight the importance of identifying ideological framings within the RAG framework in order to mitigate not just unintended ideological bias, but also the risks of intentional discourse steering of such models.
%U https://aclanthology.org/2026.starsem-conference.7/
%P 111-124
Markdown (Informal)
[Uncovering Ideological Bias in RAG with Lexical Multidimensional Analysis: A Case Study on COVID-19](https://aclanthology.org/2026.starsem-conference.7/) (Salari et al., *SEM 2026)
ACL
- Elmira Salari, Maria Claudia Nunes Delfino, Hazem Amamou, José Victor de Souza, Shruti Kshirsagar, Alan Davoust, and Anderson Avila. 2026. Uncovering Ideological Bias in RAG with Lexical Multidimensional Analysis: A Case Study on COVID-19. In Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026), pages 111–124, San Diego, California, United States. Association for Computational Linguistics.