@inproceedings{dyer-2026-surprisal,
title = "What does Surprisal have to do with Information Status?",
author = "Dyer, Andrew Thomas",
editor = "Vylomova, Ekaterina and
Shcherbakov, Andrei and
Rani, Priya",
booktitle = "Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual {NLP}",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigtyp-main.4/",
pages = "26--31",
ISBN = "979-8-89176-374-6",
abstract = "It is common in cognitive computational linguistics to use language model surprisal as a measure of the information content of units in language production. From here, it is tempting to then apply this to information structure and status, considering surprising mentions to be \textit{new} and unsurprising ones to be given, providing us with a ready-made continuous metric of information givenness/newness. To see if this conflation is appropriate, we perform regression experiments to see if language model surprisal is actually well predicted by information status as manually annotated, and if so, if this effect is separable from more trivial linguistic information such as parts of speech and word frequency. We find that information status alone is at best a very weak predictor of surprisal, and that surprisal can be much better predicted by the effect of parts of speech, which are highly correlated with both information status and surprisal; and word frequency. We conclude that surprisal should not be used as a continuous representation of information status by itself."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dyer-2026-surprisal">
<titleInfo>
<title>What does Surprisal have to do with Information Status?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="given">Thomas</namePart>
<namePart type="family">Dyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Shcherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priya</namePart>
<namePart type="family">Rani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-374-6</identifier>
</relatedItem>
<abstract>It is common in cognitive computational linguistics to use language model surprisal as a measure of the information content of units in language production. From here, it is tempting to then apply this to information structure and status, considering surprising mentions to be new and unsurprising ones to be given, providing us with a ready-made continuous metric of information givenness/newness. To see if this conflation is appropriate, we perform regression experiments to see if language model surprisal is actually well predicted by information status as manually annotated, and if so, if this effect is separable from more trivial linguistic information such as parts of speech and word frequency. We find that information status alone is at best a very weak predictor of surprisal, and that surprisal can be much better predicted by the effect of parts of speech, which are highly correlated with both information status and surprisal; and word frequency. We conclude that surprisal should not be used as a continuous representation of information status by itself.</abstract>
<identifier type="citekey">dyer-2026-surprisal</identifier>
<location>
<url>https://aclanthology.org/2026.sigtyp-main.4/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>26</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What does Surprisal have to do with Information Status?
%A Dyer, Andrew Thomas
%Y Vylomova, Ekaterina
%Y Shcherbakov, Andrei
%Y Rani, Priya
%S Proceedings of the 8th Workshop on Research in Computational Linguistic Typology and Multilingual NLP
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-374-6
%F dyer-2026-surprisal
%X It is common in cognitive computational linguistics to use language model surprisal as a measure of the information content of units in language production. From here, it is tempting to then apply this to information structure and status, considering surprising mentions to be new and unsurprising ones to be given, providing us with a ready-made continuous metric of information givenness/newness. To see if this conflation is appropriate, we perform regression experiments to see if language model surprisal is actually well predicted by information status as manually annotated, and if so, if this effect is separable from more trivial linguistic information such as parts of speech and word frequency. We find that information status alone is at best a very weak predictor of surprisal, and that surprisal can be much better predicted by the effect of parts of speech, which are highly correlated with both information status and surprisal; and word frequency. We conclude that surprisal should not be used as a continuous representation of information status by itself.
%U https://aclanthology.org/2026.sigtyp-main.4/
%P 26-31
Markdown (Informal)
[What does Surprisal have to do with Information Status?](https://aclanthology.org/2026.sigtyp-main.4/) (Dyer, SIGTYP 2026)
ACL