@inproceedings{sen-2020-speech,
title = "Speech Disfluencies occur at Higher Perplexities",
author = "Sen, Priyanka",
editor = "Zock, Michael and
Chersoni, Emmanuele and
Lenci, Alessandro and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on the Cognitive Aspects of the Lexicon",
month = dec,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.cogalex-1.11",
pages = "92--97",
abstract = "Speech disfluencies have been hypothesized to occur before words that are less predictable and therefore more cognitively demanding. In this paper, we revisit this hypothesis by using OpenAI{'}s GPT-2 to calculate predictability of words as language model perplexity. Using the Switchboard corpus, we find that 51{\%} of disfluencies occur at the highest, second highest, or within one token of the highest perplexity, and this distribution is not random. We also show that disfluencies precede words with significantly higher perplexity than fluent contexts. Based on our results, we offer new evidence that disfluencies are more likely to occur before less predictable words.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sen-2020-speech">
<titleInfo>
<title>Speech Disfluencies occur at Higher Perplexities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Priyanka</namePart>
<namePart type="family">Sen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on the Cognitive Aspects of the Lexicon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Zock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Speech disfluencies have been hypothesized to occur before words that are less predictable and therefore more cognitively demanding. In this paper, we revisit this hypothesis by using OpenAI’s GPT-2 to calculate predictability of words as language model perplexity. Using the Switchboard corpus, we find that 51% of disfluencies occur at the highest, second highest, or within one token of the highest perplexity, and this distribution is not random. We also show that disfluencies precede words with significantly higher perplexity than fluent contexts. Based on our results, we offer new evidence that disfluencies are more likely to occur before less predictable words.</abstract>
<identifier type="citekey">sen-2020-speech</identifier>
<location>
<url>https://aclanthology.org/2020.cogalex-1.11</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>92</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Disfluencies occur at Higher Perplexities
%A Sen, Priyanka
%Y Zock, Michael
%Y Chersoni, Emmanuele
%Y Lenci, Alessandro
%Y Santus, Enrico
%S Proceedings of the Workshop on the Cognitive Aspects of the Lexicon
%D 2020
%8 December
%I Association for Computational Linguistics
%C Online
%F sen-2020-speech
%X Speech disfluencies have been hypothesized to occur before words that are less predictable and therefore more cognitively demanding. In this paper, we revisit this hypothesis by using OpenAI’s GPT-2 to calculate predictability of words as language model perplexity. Using the Switchboard corpus, we find that 51% of disfluencies occur at the highest, second highest, or within one token of the highest perplexity, and this distribution is not random. We also show that disfluencies precede words with significantly higher perplexity than fluent contexts. Based on our results, we offer new evidence that disfluencies are more likely to occur before less predictable words.
%U https://aclanthology.org/2020.cogalex-1.11
%P 92-97
Markdown (Informal)
[Speech Disfluencies occur at Higher Perplexities](https://aclanthology.org/2020.cogalex-1.11) (Sen, CogALex 2020)
ACL