@inproceedings{yadavalli-etal-2026-prosody,
title = "What Do Prosody and Text Convey? Characterizing How Meaningful Information is Distributed Across Multiple Channels",
author = "Yadavalli, Aditya and
Pimentel, Tiago and
Regev, Tamar I and
Wilcox, Ethan Gotlieb and
Warstadt, Alex",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1085/",
pages = "23665--23679",
ISBN = "979-8-89176-390-6",
abstract = "Prosody{---}the melody of speech{---}conveys critical information often not captured by the words or text of a message.In this paper, we propose an information-theoretic approach to quantify how much is conveyed by prosody that is not recoverable from text alone, and, crucially, what prosody conveys.Our approach applies large speech and language models to estimate the mutual information between a particular dimension of an utterance{'}s meaning (e.g., its emotion) and any of its communication channels (e.g., audio or text).We then use this approach to quantify the information conveyed by audio and text about sarcasm, emotion, and questionhood, using speech from television and podcasts.We find that for sarcasm and emotion, the audio channel, and by implication the prosodic channel, transmits over an order of magnitude more information about these features than the text channel alone, at least when long-term context beyond the current sentence is unavailable.For questionhood, prosody provides comparatively less additional information.We conclude by outlining a program applying our approach to more dimensions of meaning, communication channels, and languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yadavalli-etal-2026-prosody">
<titleInfo>
<title>What Do Prosody and Text Convey? Characterizing How Meaningful Information is Distributed Across Multiple Channels</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Yadavalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Pimentel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tamar</namePart>
<namePart type="given">I</namePart>
<namePart type="family">Regev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ethan</namePart>
<namePart type="given">Gotlieb</namePart>
<namePart type="family">Wilcox</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Prosody—the melody of speech—conveys critical information often not captured by the words or text of a message.In this paper, we propose an information-theoretic approach to quantify how much is conveyed by prosody that is not recoverable from text alone, and, crucially, what prosody conveys.Our approach applies large speech and language models to estimate the mutual information between a particular dimension of an utterance’s meaning (e.g., its emotion) and any of its communication channels (e.g., audio or text).We then use this approach to quantify the information conveyed by audio and text about sarcasm, emotion, and questionhood, using speech from television and podcasts.We find that for sarcasm and emotion, the audio channel, and by implication the prosodic channel, transmits over an order of magnitude more information about these features than the text channel alone, at least when long-term context beyond the current sentence is unavailable.For questionhood, prosody provides comparatively less additional information.We conclude by outlining a program applying our approach to more dimensions of meaning, communication channels, and languages.</abstract>
<identifier type="citekey">yadavalli-etal-2026-prosody</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1085/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>23665</start>
<end>23679</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What Do Prosody and Text Convey? Characterizing How Meaningful Information is Distributed Across Multiple Channels
%A Yadavalli, Aditya
%A Pimentel, Tiago
%A Regev, Tamar I.
%A Wilcox, Ethan Gotlieb
%A Warstadt, Alex
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F yadavalli-etal-2026-prosody
%X Prosody—the melody of speech—conveys critical information often not captured by the words or text of a message.In this paper, we propose an information-theoretic approach to quantify how much is conveyed by prosody that is not recoverable from text alone, and, crucially, what prosody conveys.Our approach applies large speech and language models to estimate the mutual information between a particular dimension of an utterance’s meaning (e.g., its emotion) and any of its communication channels (e.g., audio or text).We then use this approach to quantify the information conveyed by audio and text about sarcasm, emotion, and questionhood, using speech from television and podcasts.We find that for sarcasm and emotion, the audio channel, and by implication the prosodic channel, transmits over an order of magnitude more information about these features than the text channel alone, at least when long-term context beyond the current sentence is unavailable.For questionhood, prosody provides comparatively less additional information.We conclude by outlining a program applying our approach to more dimensions of meaning, communication channels, and languages.
%U https://aclanthology.org/2026.acl-long.1085/
%P 23665-23679
Markdown (Informal)
[What Do Prosody and Text Convey? Characterizing How Meaningful Information is Distributed Across Multiple Channels](https://aclanthology.org/2026.acl-long.1085/) (Yadavalli et al., ACL 2026)
ACL