@inproceedings{jiang-liang-2026-capturing,
title = "Capturing Epistemic Uncertainty in {LLM}-Based Soft Labeling",
author = "Jiang, Yanru and
Liang, Siyu",
editor = "Mille, Simon and
Gehrmann, Sebastian and
Schmidtov{\'a}, Patr{\'i}cia and
Du{\v{s}}ek, Ond{\v{r}}ej and
Fadaee, Marzieh and
Lo, Kyle and
Santus, Enrico and
Stanovsky, Gabriel",
booktitle = "Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics ({GEM})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.gem-main.21/",
pages = "177--190",
ISBN = "979-8-89176-423-1",
abstract = "In many human-annotated NLP tasks involving ambiguity or subjective judgment, annotator disagreement reflects epistemic uncertainty rather than noise. Soft labeling (SL), which represents annotations as probability distributions rather than majority-vote (MV) labels, preserves this uncertainty and can improve downstream performance. We extend this perspective to LLM-based annotation by formalizing LLM soft labeling as introducing controlled variation in model-generated annotations to approximate the latent variability underlying human annotations. We distinguish two sources of variation: model-induced (e.g., stochastic decoding and model ensembles) and human-approximated (e.g., persona prompting and human-calibrated in-context annotation). Using the Gab Hate and GoEmotions datasets, we show that SL training consistently outperforms MV training under stronger LLM-based annotation strategies. Model ensembles produce the most informative soft-label distributions, achieving the best human{--}LLM agreement and downstream classification performance. These findings suggest that scalable LLM-based annotation pipelines can model epistemic uncertainty through diverse model-level variation without explicitly simulating human attributes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jiang-liang-2026-capturing">
<titleInfo>
<title>Capturing Epistemic Uncertainty in LLM-Based Soft Labeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanru</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyu</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics (GEM)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrícia</namePart>
<namePart type="family">Schmidtová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marzieh</namePart>
<namePart type="family">Fadaee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Lo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Stanovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-423-1</identifier>
</relatedItem>
<abstract>In many human-annotated NLP tasks involving ambiguity or subjective judgment, annotator disagreement reflects epistemic uncertainty rather than noise. Soft labeling (SL), which represents annotations as probability distributions rather than majority-vote (MV) labels, preserves this uncertainty and can improve downstream performance. We extend this perspective to LLM-based annotation by formalizing LLM soft labeling as introducing controlled variation in model-generated annotations to approximate the latent variability underlying human annotations. We distinguish two sources of variation: model-induced (e.g., stochastic decoding and model ensembles) and human-approximated (e.g., persona prompting and human-calibrated in-context annotation). Using the Gab Hate and GoEmotions datasets, we show that SL training consistently outperforms MV training under stronger LLM-based annotation strategies. Model ensembles produce the most informative soft-label distributions, achieving the best human–LLM agreement and downstream classification performance. These findings suggest that scalable LLM-based annotation pipelines can model epistemic uncertainty through diverse model-level variation without explicitly simulating human attributes.</abstract>
<identifier type="citekey">jiang-liang-2026-capturing</identifier>
<location>
<url>https://aclanthology.org/2026.gem-main.21/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>177</start>
<end>190</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Capturing Epistemic Uncertainty in LLM-Based Soft Labeling
%A Jiang, Yanru
%A Liang, Siyu
%Y Mille, Simon
%Y Gehrmann, Sebastian
%Y Schmidtová, Patrícia
%Y Dušek, Ondřej
%Y Fadaee, Marzieh
%Y Lo, Kyle
%Y Santus, Enrico
%Y Stanovsky, Gabriel
%S Proceedings of the Fifth Workshop on Generation, Evaluation and Metrics (GEM)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-423-1
%F jiang-liang-2026-capturing
%X In many human-annotated NLP tasks involving ambiguity or subjective judgment, annotator disagreement reflects epistemic uncertainty rather than noise. Soft labeling (SL), which represents annotations as probability distributions rather than majority-vote (MV) labels, preserves this uncertainty and can improve downstream performance. We extend this perspective to LLM-based annotation by formalizing LLM soft labeling as introducing controlled variation in model-generated annotations to approximate the latent variability underlying human annotations. We distinguish two sources of variation: model-induced (e.g., stochastic decoding and model ensembles) and human-approximated (e.g., persona prompting and human-calibrated in-context annotation). Using the Gab Hate and GoEmotions datasets, we show that SL training consistently outperforms MV training under stronger LLM-based annotation strategies. Model ensembles produce the most informative soft-label distributions, achieving the best human–LLM agreement and downstream classification performance. These findings suggest that scalable LLM-based annotation pipelines can model epistemic uncertainty through diverse model-level variation without explicitly simulating human attributes.
%U https://aclanthology.org/2026.gem-main.21/
%P 177-190
Markdown (Informal)
[Capturing Epistemic Uncertainty in LLM-Based Soft Labeling](https://aclanthology.org/2026.gem-main.21/) (Jiang & Liang, GEM 2026)
ACL