@inproceedings{huang-etal-2025-confidence,
title = "Confidence-Based Response Abstinence: Improving {LLM} Trustworthiness via Activation-Based Uncertainty Estimation",
author = "Huang, Zhiqi and
Datla, Vivek and
Zhu, Chenyang and
Samuel, Alfy and
Liu, Daben and
Kumar, Anoop and
Soni, Ritesh",
editor = {Eikema, Bryan and
V{\'a}zquez, Ra{\'u}l and
Berant, Jonathan and
de Marneffe, Marie-Catherine and
Plank, Barbara and
Shelmanov, Artem and
Swayamdipta, Swabha and
Tiedemann, J{\"o}rg and
Zerva, Chrysoula and
Aziz, Wilker},
booktitle = "Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.uncertainlp-main.16/",
doi = "10.18653/v1/2025.uncertainlp-main.16",
pages = "184--193",
ISBN = "979-8-89176-349-4",
abstract = "We propose a method for confidence estimation in retrieval-augmented generation (RAG) systems that aligns closely with the correctness of large language model (LLM) outputs. Confidence estimation is especially critical in high-stakes domains such as finance and healthcare, where the cost of an incorrect answer outweighs that of not answering the question. Our approach extends prior uncertainty quantification methods by leveraging raw feed-forward network (FFN) activations as auto-regressive signals, avoiding the information loss inherent in token logits and probabilities after projection and softmax normalization. We model confidence prediction as a sequence classification task, and regularize training with a Huber loss term to improve robustness against noisy supervision. Applied in a real-world financial industry customer-support setting with complex knowledge bases, our method outperforms strong baselines and maintains high accuracy under strict latency constraints. Experiments on Llama 3.1 8B model show that using activations from only the 16th layer preserves accuracy while reducing response latency. Our results demonstrate that activation-based confidence modeling offers a scalable, architecture-aware path toward trustworthy RAG deployment."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2025-confidence">
<titleInfo>
<title>Confidence-Based Response Abstinence: Improving LLM Trustworthiness via Activation-Based Uncertainty Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhiqi</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Datla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenyang</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alfy</namePart>
<namePart type="family">Samuel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daben</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">Eikema</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="family">Vázquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Berant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chrysoula</namePart>
<namePart type="family">Zerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wilker</namePart>
<namePart type="family">Aziz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-349-4</identifier>
</relatedItem>
<abstract>We propose a method for confidence estimation in retrieval-augmented generation (RAG) systems that aligns closely with the correctness of large language model (LLM) outputs. Confidence estimation is especially critical in high-stakes domains such as finance and healthcare, where the cost of an incorrect answer outweighs that of not answering the question. Our approach extends prior uncertainty quantification methods by leveraging raw feed-forward network (FFN) activations as auto-regressive signals, avoiding the information loss inherent in token logits and probabilities after projection and softmax normalization. We model confidence prediction as a sequence classification task, and regularize training with a Huber loss term to improve robustness against noisy supervision. Applied in a real-world financial industry customer-support setting with complex knowledge bases, our method outperforms strong baselines and maintains high accuracy under strict latency constraints. Experiments on Llama 3.1 8B model show that using activations from only the 16th layer preserves accuracy while reducing response latency. Our results demonstrate that activation-based confidence modeling offers a scalable, architecture-aware path toward trustworthy RAG deployment.</abstract>
<identifier type="citekey">huang-etal-2025-confidence</identifier>
<identifier type="doi">10.18653/v1/2025.uncertainlp-main.16</identifier>
<location>
<url>https://aclanthology.org/2025.uncertainlp-main.16/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>184</start>
<end>193</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Confidence-Based Response Abstinence: Improving LLM Trustworthiness via Activation-Based Uncertainty Estimation
%A Huang, Zhiqi
%A Datla, Vivek
%A Zhu, Chenyang
%A Samuel, Alfy
%A Liu, Daben
%A Kumar, Anoop
%A Soni, Ritesh
%Y Eikema, Bryan
%Y Vázquez, Raúl
%Y Berant, Jonathan
%Y de Marneffe, Marie-Catherine
%Y Plank, Barbara
%Y Shelmanov, Artem
%Y Swayamdipta, Swabha
%Y Tiedemann, Jörg
%Y Zerva, Chrysoula
%Y Aziz, Wilker
%S Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-349-4
%F huang-etal-2025-confidence
%X We propose a method for confidence estimation in retrieval-augmented generation (RAG) systems that aligns closely with the correctness of large language model (LLM) outputs. Confidence estimation is especially critical in high-stakes domains such as finance and healthcare, where the cost of an incorrect answer outweighs that of not answering the question. Our approach extends prior uncertainty quantification methods by leveraging raw feed-forward network (FFN) activations as auto-regressive signals, avoiding the information loss inherent in token logits and probabilities after projection and softmax normalization. We model confidence prediction as a sequence classification task, and regularize training with a Huber loss term to improve robustness against noisy supervision. Applied in a real-world financial industry customer-support setting with complex knowledge bases, our method outperforms strong baselines and maintains high accuracy under strict latency constraints. Experiments on Llama 3.1 8B model show that using activations from only the 16th layer preserves accuracy while reducing response latency. Our results demonstrate that activation-based confidence modeling offers a scalable, architecture-aware path toward trustworthy RAG deployment.
%R 10.18653/v1/2025.uncertainlp-main.16
%U https://aclanthology.org/2025.uncertainlp-main.16/
%U https://doi.org/10.18653/v1/2025.uncertainlp-main.16
%P 184-193
Markdown (Informal)
[Confidence-Based Response Abstinence: Improving LLM Trustworthiness via Activation-Based Uncertainty Estimation](https://aclanthology.org/2025.uncertainlp-main.16/) (Huang et al., UncertaiNLP 2025)
ACL