@inproceedings{huo-etal-2026-breaking,
title = "Breaking Language Preference in Multilingual {RAG} via Language-Controllable Retrieval and Language-Agnostic Reasoning",
author = "Huo, Wenshuai and
Feng, Xiaocheng and
Li, Baohang and
Fu, Chengpeng and
Huang, Yichong and
Wang, Hui and
Qin, Bing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.374/",
pages = "7579--7589",
ISBN = "979-8-89176-395-1",
abstract = "Retrieval-Augmented Generation (RAG) significantly improves the factual accuracy and generation quality of large language models by incorporating external knowledge. However, in multilingual settings, RAG systems suffer from severe language preference. On the one hand, the retrieval stage is sensitive to the query language: semantically equivalent queries expressed in different languages often lead to substantially different retrieval results. On the other hand, when retrieved documents contain knowledge written in multiple languages, large language models tend to be influenced by surface-level language forms, rather than reasoning solely based on semantic relevance to the query.To address these challenges, we propose a unified optimization framework that explicitly disentangles multilingual RAG into language-controllable retrieval and language-agnostic reasoning. Our framework allows LLM to adaptively select retrieval languages while enforcing cross-lingual consistency during reasoning, thereby mitigating language bias without modifying existing retrievers or translators. Experimental results demonstrate that our approach effectively reduces language bias in multilingual RAG and consistently outperforms baselines across multiple multilingual benchmarks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huo-etal-2026-breaking">
<titleInfo>
<title>Breaking Language Preference in Multilingual RAG via Language-Controllable Retrieval and Language-Agnostic Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenshuai</namePart>
<namePart type="family">Huo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaocheng</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baohang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengpeng</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Retrieval-Augmented Generation (RAG) significantly improves the factual accuracy and generation quality of large language models by incorporating external knowledge. However, in multilingual settings, RAG systems suffer from severe language preference. On the one hand, the retrieval stage is sensitive to the query language: semantically equivalent queries expressed in different languages often lead to substantially different retrieval results. On the other hand, when retrieved documents contain knowledge written in multiple languages, large language models tend to be influenced by surface-level language forms, rather than reasoning solely based on semantic relevance to the query.To address these challenges, we propose a unified optimization framework that explicitly disentangles multilingual RAG into language-controllable retrieval and language-agnostic reasoning. Our framework allows LLM to adaptively select retrieval languages while enforcing cross-lingual consistency during reasoning, thereby mitigating language bias without modifying existing retrievers or translators. Experimental results demonstrate that our approach effectively reduces language bias in multilingual RAG and consistently outperforms baselines across multiple multilingual benchmarks.</abstract>
<identifier type="citekey">huo-etal-2026-breaking</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.374/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>7579</start>
<end>7589</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Breaking Language Preference in Multilingual RAG via Language-Controllable Retrieval and Language-Agnostic Reasoning
%A Huo, Wenshuai
%A Feng, Xiaocheng
%A Li, Baohang
%A Fu, Chengpeng
%A Huang, Yichong
%A Wang, Hui
%A Qin, Bing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F huo-etal-2026-breaking
%X Retrieval-Augmented Generation (RAG) significantly improves the factual accuracy and generation quality of large language models by incorporating external knowledge. However, in multilingual settings, RAG systems suffer from severe language preference. On the one hand, the retrieval stage is sensitive to the query language: semantically equivalent queries expressed in different languages often lead to substantially different retrieval results. On the other hand, when retrieved documents contain knowledge written in multiple languages, large language models tend to be influenced by surface-level language forms, rather than reasoning solely based on semantic relevance to the query.To address these challenges, we propose a unified optimization framework that explicitly disentangles multilingual RAG into language-controllable retrieval and language-agnostic reasoning. Our framework allows LLM to adaptively select retrieval languages while enforcing cross-lingual consistency during reasoning, thereby mitigating language bias without modifying existing retrievers or translators. Experimental results demonstrate that our approach effectively reduces language bias in multilingual RAG and consistently outperforms baselines across multiple multilingual benchmarks.
%U https://aclanthology.org/2026.findings-acl.374/
%P 7579-7589
Markdown (Informal)
[Breaking Language Preference in Multilingual RAG via Language-Controllable Retrieval and Language-Agnostic Reasoning](https://aclanthology.org/2026.findings-acl.374/) (Huo et al., Findings 2026)
ACL
- Wenshuai Huo, Xiaocheng Feng, Baohang Li, Chengpeng Fu, Yichong Huang, Hui Wang, and Bing Qin. 2026. Breaking Language Preference in Multilingual RAG via Language-Controllable Retrieval and Language-Agnostic Reasoning. In Findings of the Association for Computational Linguistics: ACL 2026, pages 7579–7589, San Diego, California, United States. Association for Computational Linguistics.