@inproceedings{cao-etal-2026-semantic,
title = "Semantic Token Clustering for Efficient Uncertainty Quantification in Large Language Models",
author = "Cao, Qi and
Gambardella, Andrew and
Kojima, Takeshi and
Matsuo, Yutaka and
Iwasawa, Yusuke",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-short.49/",
pages = "682--696",
ISBN = "979-8-89176-381-4",
abstract = "Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse tasks. However, their limited truthfulness and tendency toward overconfidence constrain their reliability in factual tasks. Uncertainty quantification offers a promising approach to identifying potentially unreliable outputs from LLMs. Yet most existing methods rely on repeated sampling or auxiliary models, which substantially increase computational overhead. To address these limitations, we propose an efficient uncertainty quantification method that leverages semantic information inherently encoded in LLMs. Specifically, we group tokens into semantically consistent clusters based on embedding clustering and prefix matching, and compute a cluster-based score at each decoding step to represent uncertainty. Our approach requires only a single generation and does not depend on any auxiliary models. Experiments on multiple datasets and models demonstrate that our method achieves performance comparable to existing baselines while substantially reducing computational overhead."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cao-etal-2026-semantic">
<titleInfo>
<title>Semantic Token Clustering for Efficient Uncertainty Quantification in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Gambardella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takeshi</namePart>
<namePart type="family">Kojima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yutaka</namePart>
<namePart type="family">Matsuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Iwasawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-381-4</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse tasks. However, their limited truthfulness and tendency toward overconfidence constrain their reliability in factual tasks. Uncertainty quantification offers a promising approach to identifying potentially unreliable outputs from LLMs. Yet most existing methods rely on repeated sampling or auxiliary models, which substantially increase computational overhead. To address these limitations, we propose an efficient uncertainty quantification method that leverages semantic information inherently encoded in LLMs. Specifically, we group tokens into semantically consistent clusters based on embedding clustering and prefix matching, and compute a cluster-based score at each decoding step to represent uncertainty. Our approach requires only a single generation and does not depend on any auxiliary models. Experiments on multiple datasets and models demonstrate that our method achieves performance comparable to existing baselines while substantially reducing computational overhead.</abstract>
<identifier type="citekey">cao-etal-2026-semantic</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-short.49/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>682</start>
<end>696</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Semantic Token Clustering for Efficient Uncertainty Quantification in Large Language Models
%A Cao, Qi
%A Gambardella, Andrew
%A Kojima, Takeshi
%A Matsuo, Yutaka
%A Iwasawa, Yusuke
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-381-4
%F cao-etal-2026-semantic
%X Large Language Models (LLMs) have demonstrated remarkable capabilities across diverse tasks. However, their limited truthfulness and tendency toward overconfidence constrain their reliability in factual tasks. Uncertainty quantification offers a promising approach to identifying potentially unreliable outputs from LLMs. Yet most existing methods rely on repeated sampling or auxiliary models, which substantially increase computational overhead. To address these limitations, we propose an efficient uncertainty quantification method that leverages semantic information inherently encoded in LLMs. Specifically, we group tokens into semantically consistent clusters based on embedding clustering and prefix matching, and compute a cluster-based score at each decoding step to represent uncertainty. Our approach requires only a single generation and does not depend on any auxiliary models. Experiments on multiple datasets and models demonstrate that our method achieves performance comparable to existing baselines while substantially reducing computational overhead.
%U https://aclanthology.org/2026.eacl-short.49/
%P 682-696
Markdown (Informal)
[Semantic Token Clustering for Efficient Uncertainty Quantification in Large Language Models](https://aclanthology.org/2026.eacl-short.49/) (Cao et al., EACL 2026)
ACL