@inproceedings{qu-etal-2025-uplift,
title = "Uplift-{RAG}: Uplift-Driven Knowledge Preference Alignment for Retrieval-Augmented Generation",
author = "Qu, Changle and
Dai, Sunhao and
Cai, Hengyi and
Cheng, Yiyang and
Xu, Jun and
Wang, Shuaiqiang and
Yin, Dawei",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.511/",
pages = "9632--9644",
ISBN = "979-8-89176-335-7",
abstract = "Retrieval-augmented generation (RAG) has proven effective in enhancing the knowledge coverage of large language models (LLMs) and mitigating hallucinations by incorporating external retrieved documents. However, documents deemed relevant by the retriever are not necessarily helpful for answer generation, and including misleading information can even degrade performance. Existing efforts to estimate document utility often rely on the downstream generation performance, which conflates the influence of external documents with the intrinsic knowledge of the LLM, thereby obscuring the actual contribution of the retrieved content. To address this, this paper proposes Uplit-RAG, a uplift-driven knowledge preference alignment framework for RAG. Specifically, we first propose an uplift-based definition of document utility that quantifies each document{'}s marginal benefit over the LLM{'}s internal knowledge. We then optimize the reranker with three alignment objectives to identify and prioritize documents based on their uplift. This enables dynamic selection of documents that address the LLM{'}s knowledge gaps, going beyond fixed top-k selection, while reducing reference redundancy and the computational overhead of the LLM{'}s input. Extensive experiments demonstrate the effectiveness of Uplift-RAG."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qu-etal-2025-uplift">
<titleInfo>
<title>Uplift-RAG: Uplift-Driven Knowledge Preference Alignment for Retrieval-Augmented Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Changle</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunhao</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hengyi</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiyang</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuaiqiang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dawei</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Retrieval-augmented generation (RAG) has proven effective in enhancing the knowledge coverage of large language models (LLMs) and mitigating hallucinations by incorporating external retrieved documents. However, documents deemed relevant by the retriever are not necessarily helpful for answer generation, and including misleading information can even degrade performance. Existing efforts to estimate document utility often rely on the downstream generation performance, which conflates the influence of external documents with the intrinsic knowledge of the LLM, thereby obscuring the actual contribution of the retrieved content. To address this, this paper proposes Uplit-RAG, a uplift-driven knowledge preference alignment framework for RAG. Specifically, we first propose an uplift-based definition of document utility that quantifies each document’s marginal benefit over the LLM’s internal knowledge. We then optimize the reranker with three alignment objectives to identify and prioritize documents based on their uplift. This enables dynamic selection of documents that address the LLM’s knowledge gaps, going beyond fixed top-k selection, while reducing reference redundancy and the computational overhead of the LLM’s input. Extensive experiments demonstrate the effectiveness of Uplift-RAG.</abstract>
<identifier type="citekey">qu-etal-2025-uplift</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.511/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>9632</start>
<end>9644</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uplift-RAG: Uplift-Driven Knowledge Preference Alignment for Retrieval-Augmented Generation
%A Qu, Changle
%A Dai, Sunhao
%A Cai, Hengyi
%A Cheng, Yiyang
%A Xu, Jun
%A Wang, Shuaiqiang
%A Yin, Dawei
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F qu-etal-2025-uplift
%X Retrieval-augmented generation (RAG) has proven effective in enhancing the knowledge coverage of large language models (LLMs) and mitigating hallucinations by incorporating external retrieved documents. However, documents deemed relevant by the retriever are not necessarily helpful for answer generation, and including misleading information can even degrade performance. Existing efforts to estimate document utility often rely on the downstream generation performance, which conflates the influence of external documents with the intrinsic knowledge of the LLM, thereby obscuring the actual contribution of the retrieved content. To address this, this paper proposes Uplit-RAG, a uplift-driven knowledge preference alignment framework for RAG. Specifically, we first propose an uplift-based definition of document utility that quantifies each document’s marginal benefit over the LLM’s internal knowledge. We then optimize the reranker with three alignment objectives to identify and prioritize documents based on their uplift. This enables dynamic selection of documents that address the LLM’s knowledge gaps, going beyond fixed top-k selection, while reducing reference redundancy and the computational overhead of the LLM’s input. Extensive experiments demonstrate the effectiveness of Uplift-RAG.
%U https://aclanthology.org/2025.findings-emnlp.511/
%P 9632-9644
Markdown (Informal)
[Uplift-RAG: Uplift-Driven Knowledge Preference Alignment for Retrieval-Augmented Generation](https://aclanthology.org/2025.findings-emnlp.511/) (Qu et al., Findings 2025)
ACL