@inproceedings{ryu-lewis-2021-accounting,
title = "Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention",
author = "Ryu, Soo Hyun and
Lewis, Richard",
editor = "Chersoni, Emmanuele and
Hollenstein, Nora and
Jacobs, Cassandra and
Oseki, Yohei and
Pr{\'e}vot, Laurent and
Santus, Enrico",
booktitle = "Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.cmcl-1.6",
doi = "10.18653/v1/2021.cmcl-1.6",
pages = "61--71",
abstract = "We advance a novel explanation of similarity-based interference effects in subject-verb and reflexive pronoun agreement processing, grounded in surprisal values computed from a pretrained large-scale Transformer model, GPT-2. Specifically, we show that surprisal of the verb or reflexive pronoun predicts facilitatory interference effects in ungrammatical sentences, where a distractor noun that matches in number with the verb or pronouns leads to faster reading times, despite the distractor not participating in the agreement relation. We review the human empirical evidence for such effects, including recent meta-analyses and large-scale studies. We also show that attention patterns (indexed by entropy and other measures) in the Transformer show patterns of diffuse attention in the presence of similar distractors, consistent with cue-based retrieval models of parsing. But in contrast to these models, the attentional cues and memory representations are learned entirely from the simple self-supervised task of predicting the next word.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ryu-lewis-2021-accounting">
<titleInfo>
<title>Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soo</namePart>
<namePart type="given">Hyun</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emmanuele</namePart>
<namePart type="family">Chersoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Hollenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cassandra</namePart>
<namePart type="family">Jacobs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohei</namePart>
<namePart type="family">Oseki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Prévot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We advance a novel explanation of similarity-based interference effects in subject-verb and reflexive pronoun agreement processing, grounded in surprisal values computed from a pretrained large-scale Transformer model, GPT-2. Specifically, we show that surprisal of the verb or reflexive pronoun predicts facilitatory interference effects in ungrammatical sentences, where a distractor noun that matches in number with the verb or pronouns leads to faster reading times, despite the distractor not participating in the agreement relation. We review the human empirical evidence for such effects, including recent meta-analyses and large-scale studies. We also show that attention patterns (indexed by entropy and other measures) in the Transformer show patterns of diffuse attention in the presence of similar distractors, consistent with cue-based retrieval models of parsing. But in contrast to these models, the attentional cues and memory representations are learned entirely from the simple self-supervised task of predicting the next word.</abstract>
<identifier type="citekey">ryu-lewis-2021-accounting</identifier>
<identifier type="doi">10.18653/v1/2021.cmcl-1.6</identifier>
<location>
<url>https://aclanthology.org/2021.cmcl-1.6</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>61</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention
%A Ryu, Soo Hyun
%A Lewis, Richard
%Y Chersoni, Emmanuele
%Y Hollenstein, Nora
%Y Jacobs, Cassandra
%Y Oseki, Yohei
%Y Prévot, Laurent
%Y Santus, Enrico
%S Proceedings of the Workshop on Cognitive Modeling and Computational Linguistics
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F ryu-lewis-2021-accounting
%X We advance a novel explanation of similarity-based interference effects in subject-verb and reflexive pronoun agreement processing, grounded in surprisal values computed from a pretrained large-scale Transformer model, GPT-2. Specifically, we show that surprisal of the verb or reflexive pronoun predicts facilitatory interference effects in ungrammatical sentences, where a distractor noun that matches in number with the verb or pronouns leads to faster reading times, despite the distractor not participating in the agreement relation. We review the human empirical evidence for such effects, including recent meta-analyses and large-scale studies. We also show that attention patterns (indexed by entropy and other measures) in the Transformer show patterns of diffuse attention in the presence of similar distractors, consistent with cue-based retrieval models of parsing. But in contrast to these models, the attentional cues and memory representations are learned entirely from the simple self-supervised task of predicting the next word.
%R 10.18653/v1/2021.cmcl-1.6
%U https://aclanthology.org/2021.cmcl-1.6
%U https://doi.org/10.18653/v1/2021.cmcl-1.6
%P 61-71
Markdown (Informal)
[Accounting for Agreement Phenomena in Sentence Comprehension with Transformer Language Models: Effects of Similarity-based Interference on Surprisal and Attention](https://aclanthology.org/2021.cmcl-1.6) (Ryu & Lewis, CMCL 2021)
ACL