@inproceedings{verma-etal-2023-revisiting,
title = "Revisiting Entropy Rate Constancy in Text",
author = "Verma, Vivek and
Tomlin, Nicholas and
Klein, Dan",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.1039",
doi = "10.18653/v1/2023.findings-emnlp.1039",
pages = "15537--15549",
abstract = "The uniform information density (UID) hypothesis states that humans tend to distribute information roughly evenly across an utterance or discourse. Early evidence in support of the UID hypothesis came from Genzel and Charniak (2002), which proposed an entropy rate constancy principle based on the probability of English text under $n$-gram language models. We re-evaluate the claims of Genzel and Charniak (2002) with neural language models, failing to find clear evidence in support of entropy rate constancy. We conduct a range of experiments across datasets, model sizes, and languages and discuss implications for the uniform information density hypothesis and linguistic theories of efficient communication more broadly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="verma-etal-2023-revisiting">
<titleInfo>
<title>Revisiting Entropy Rate Constancy in Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Tomlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Klein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The uniform information density (UID) hypothesis states that humans tend to distribute information roughly evenly across an utterance or discourse. Early evidence in support of the UID hypothesis came from Genzel and Charniak (2002), which proposed an entropy rate constancy principle based on the probability of English text under n-gram language models. We re-evaluate the claims of Genzel and Charniak (2002) with neural language models, failing to find clear evidence in support of entropy rate constancy. We conduct a range of experiments across datasets, model sizes, and languages and discuss implications for the uniform information density hypothesis and linguistic theories of efficient communication more broadly.</abstract>
<identifier type="citekey">verma-etal-2023-revisiting</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.1039</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.1039</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>15537</start>
<end>15549</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revisiting Entropy Rate Constancy in Text
%A Verma, Vivek
%A Tomlin, Nicholas
%A Klein, Dan
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F verma-etal-2023-revisiting
%X The uniform information density (UID) hypothesis states that humans tend to distribute information roughly evenly across an utterance or discourse. Early evidence in support of the UID hypothesis came from Genzel and Charniak (2002), which proposed an entropy rate constancy principle based on the probability of English text under n-gram language models. We re-evaluate the claims of Genzel and Charniak (2002) with neural language models, failing to find clear evidence in support of entropy rate constancy. We conduct a range of experiments across datasets, model sizes, and languages and discuss implications for the uniform information density hypothesis and linguistic theories of efficient communication more broadly.
%R 10.18653/v1/2023.findings-emnlp.1039
%U https://aclanthology.org/2023.findings-emnlp.1039
%U https://doi.org/10.18653/v1/2023.findings-emnlp.1039
%P 15537-15549
Markdown (Informal)
[Revisiting Entropy Rate Constancy in Text](https://aclanthology.org/2023.findings-emnlp.1039) (Verma et al., Findings 2023)
ACL
- Vivek Verma, Nicholas Tomlin, and Dan Klein. 2023. Revisiting Entropy Rate Constancy in Text. In Findings of the Association for Computational Linguistics: EMNLP 2023, pages 15537–15549, Singapore. Association for Computational Linguistics.