@inproceedings{santos-2026-speech,
title = "Speech Disfluencies and {LLM} Confidence: Length Bias and Pragmatic Insensitivity in {B}razilian {P}ortuguese",
author = "Santos, Valeria",
editor = "Braud, Chlo{\'e} and
Hardmeier, Christian and
Ogrodniczuk, Maciej and
Loaiciga, Sharid and
Zeldes, Amir and
Nov{\'a}k, Michal and
Li, Chuyuan and
Strube, Michael and
Li, Junyi Jessy",
booktitle = "Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference ({CODI}-{CRAC} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.codi-1.5/",
pages = "24--28",
ISBN = "979-8-89176-400-2",
abstract = "Training Large Language Models (LLMs) relies predominantly on written, curated corpora, which may limit their reliability on spontaneous speech. Oral language exhibits real-time planning markers {---} filled pauses, repetitions, false starts, and vowel lengthenings {---} that modulate epistemic commitment. This pilot study investigates how such disfluencies affect the alignment between LLM confidence and a discourse-pragmatic uncertainty proxy in a Portuguese model (Llama-3.1-8B-Instruct). Using a benchmark of 344 turns from the Roda Viva corpus, we contrast faithful Conversation Analysis transcriptions with sanitized versions and combine binned divergence metrics (ECE, OE) with rank correlation and multivariate regression analyses. We find that model confidence is overwhelmingly driven by a surface feature {---} turn length ({\$}{\{}{\textbackslash}beta{\_}{\{}{\textbackslash}text{\{}std{\}}{\}}{\}} = +14.47, p 0.001{\$}) {---} rather than by pragmatic markers of uncertainty ({\$}{\{}{\textbackslash}beta{\_}{\{}{\textbackslash}text{\{}oral{\}}{\}}{\}} = -3.09, {\{}{\textbackslash}beta{\_}{\{}{\textbackslash}text{\{}hedges{\}}{\}}{\}} = -0.97{\$}, both non-significant; {\$}R2 = 0.29{\$}). After controlling for length, residual effects of disfluency markers align in the human-expected direction but are dwarfed by length bias. We argue that this surface-feature dominance subsumes the pragmatic blindness phenomenon and explains the substantial divergence observed via ECE (41.95) and OE (4.29) between faithful and sanitized conditions."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santos-2026-speech">
<titleInfo>
<title>Speech Disfluencies and LLM Confidence: Length Bias and Pragmatic Insensitivity in Brazilian Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valeria</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference (CODI-CRAC 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chloé</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharid</namePart>
<namePart type="family">Loaiciga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Novák</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-400-2</identifier>
</relatedItem>
<abstract>Training Large Language Models (LLMs) relies predominantly on written, curated corpora, which may limit their reliability on spontaneous speech. Oral language exhibits real-time planning markers — filled pauses, repetitions, false starts, and vowel lengthenings — that modulate epistemic commitment. This pilot study investigates how such disfluencies affect the alignment between LLM confidence and a discourse-pragmatic uncertainty proxy in a Portuguese model (Llama-3.1-8B-Instruct). Using a benchmark of 344 turns from the Roda Viva corpus, we contrast faithful Conversation Analysis transcriptions with sanitized versions and combine binned divergence metrics (ECE, OE) with rank correlation and multivariate regression analyses. We find that model confidence is overwhelmingly driven by a surface feature — turn length (${\textbackslashbeta_{\textbackslashtext{std}}} = +14.47, p 0.001$) — rather than by pragmatic markers of uncertainty (${\textbackslashbeta_{\textbackslashtext{oral}}} = -3.09, {\textbackslashbeta_{\textbackslashtext{hedges}}} = -0.97$, both non-significant; $R2 = 0.29$). After controlling for length, residual effects of disfluency markers align in the human-expected direction but are dwarfed by length bias. We argue that this surface-feature dominance subsumes the pragmatic blindness phenomenon and explains the substantial divergence observed via ECE (41.95) and OE (4.29) between faithful and sanitized conditions.</abstract>
<identifier type="citekey">santos-2026-speech</identifier>
<location>
<url>https://aclanthology.org/2026.codi-1.5/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>24</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Disfluencies and LLM Confidence: Length Bias and Pragmatic Insensitivity in Brazilian Portuguese
%A Santos, Valeria
%Y Braud, Chloé
%Y Hardmeier, Christian
%Y Ogrodniczuk, Maciej
%Y Loaiciga, Sharid
%Y Zeldes, Amir
%Y Novák, Michal
%Y Li, Chuyuan
%Y Strube, Michael
%Y Li, Junyi Jessy
%S Proceedings of the 2nd Joint Workshop on Computational Approaches to Discourse, Context and Document-Level Inferences and Computational Models of Reference, Anaphora and Coreference (CODI-CRAC 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-400-2
%F santos-2026-speech
%X Training Large Language Models (LLMs) relies predominantly on written, curated corpora, which may limit their reliability on spontaneous speech. Oral language exhibits real-time planning markers — filled pauses, repetitions, false starts, and vowel lengthenings — that modulate epistemic commitment. This pilot study investigates how such disfluencies affect the alignment between LLM confidence and a discourse-pragmatic uncertainty proxy in a Portuguese model (Llama-3.1-8B-Instruct). Using a benchmark of 344 turns from the Roda Viva corpus, we contrast faithful Conversation Analysis transcriptions with sanitized versions and combine binned divergence metrics (ECE, OE) with rank correlation and multivariate regression analyses. We find that model confidence is overwhelmingly driven by a surface feature — turn length (${\textbackslashbeta_{\textbackslashtext{std}}} = +14.47, p 0.001$) — rather than by pragmatic markers of uncertainty (${\textbackslashbeta_{\textbackslashtext{oral}}} = -3.09, {\textbackslashbeta_{\textbackslashtext{hedges}}} = -0.97$, both non-significant; $R2 = 0.29$). After controlling for length, residual effects of disfluency markers align in the human-expected direction but are dwarfed by length bias. We argue that this surface-feature dominance subsumes the pragmatic blindness phenomenon and explains the substantial divergence observed via ECE (41.95) and OE (4.29) between faithful and sanitized conditions.
%U https://aclanthology.org/2026.codi-1.5/
%P 24-28Markdown (Informal)
[Speech Disfluencies and LLM Confidence: Length Bias and Pragmatic Insensitivity in Brazilian Portuguese](https://aclanthology.org/2026.codi-1.5/) (Santos, CODI-CRAC 2026)
ACL