@inproceedings{cong-2022-psycholinguistic,
title = "Psycholinguistic Diagnosis of Language Models{'} Commonsense Reasoning",
author = "Cong, Yan",
editor = "Bosselut, Antoine and
Li, Xiang and
Lin, Bill Yuchen and
Shwartz, Vered and
Majumder, Bodhisattwa Prasad and
Lal, Yash Kumar and
Rudinger, Rachel and
Ren, Xiang and
Tandon, Niket and
Zouhar, Vil{\'e}m",
booktitle = "Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.csrr-1.3",
doi = "10.18653/v1/2022.csrr-1.3",
pages = "17--22",
abstract = "Neural language models have attracted a lot of attention in the past few years. More and more researchers are getting intrigued by how language models encode commonsense, specifically what kind of commonsense they understand, and why they do. This paper analyzed neural language models{'} understanding of commonsense pragmatics (i.e., implied meanings) through human behavioral and neurophysiological data. These psycholinguistic tests are designed to draw conclusions based on predictive responses in context, making them very well suited to test word-prediction models such as BERT in natural settings. They can provide the appropriate prompts and tasks to answer questions about linguistic mechanisms underlying predictive responses. This paper adopted psycholinguistic datasets to probe language models{'} commonsense reasoning. Findings suggest that GPT-3{'}s performance was mostly at chance in the psycholinguistic tasks. We also showed that DistillBERT had some understanding of the (implied) intent that{'}s shared among most people. Such intent is implicitly reflected in the usage of conversational implicatures and presuppositions. Whether or not fine-tuning improved its performance to human-level depends on the type of commonsense reasoning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cong-2022-psycholinguistic">
<titleInfo>
<title>Psycholinguistic Diagnosis of Language Models’ Commonsense Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Cong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bill</namePart>
<namePart type="given">Yuchen</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bodhisattwa</namePart>
<namePart type="given">Prasad</namePart>
<namePart type="family">Majumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Lal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Rudinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niket</namePart>
<namePart type="family">Tandon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vilém</namePart>
<namePart type="family">Zouhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural language models have attracted a lot of attention in the past few years. More and more researchers are getting intrigued by how language models encode commonsense, specifically what kind of commonsense they understand, and why they do. This paper analyzed neural language models’ understanding of commonsense pragmatics (i.e., implied meanings) through human behavioral and neurophysiological data. These psycholinguistic tests are designed to draw conclusions based on predictive responses in context, making them very well suited to test word-prediction models such as BERT in natural settings. They can provide the appropriate prompts and tasks to answer questions about linguistic mechanisms underlying predictive responses. This paper adopted psycholinguistic datasets to probe language models’ commonsense reasoning. Findings suggest that GPT-3’s performance was mostly at chance in the psycholinguistic tasks. We also showed that DistillBERT had some understanding of the (implied) intent that’s shared among most people. Such intent is implicitly reflected in the usage of conversational implicatures and presuppositions. Whether or not fine-tuning improved its performance to human-level depends on the type of commonsense reasoning.</abstract>
<identifier type="citekey">cong-2022-psycholinguistic</identifier>
<identifier type="doi">10.18653/v1/2022.csrr-1.3</identifier>
<location>
<url>https://aclanthology.org/2022.csrr-1.3</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>17</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Psycholinguistic Diagnosis of Language Models’ Commonsense Reasoning
%A Cong, Yan
%Y Bosselut, Antoine
%Y Li, Xiang
%Y Lin, Bill Yuchen
%Y Shwartz, Vered
%Y Majumder, Bodhisattwa Prasad
%Y Lal, Yash Kumar
%Y Rudinger, Rachel
%Y Ren, Xiang
%Y Tandon, Niket
%Y Zouhar, Vilém
%S Proceedings of the First Workshop on Commonsense Representation and Reasoning (CSRR 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F cong-2022-psycholinguistic
%X Neural language models have attracted a lot of attention in the past few years. More and more researchers are getting intrigued by how language models encode commonsense, specifically what kind of commonsense they understand, and why they do. This paper analyzed neural language models’ understanding of commonsense pragmatics (i.e., implied meanings) through human behavioral and neurophysiological data. These psycholinguistic tests are designed to draw conclusions based on predictive responses in context, making them very well suited to test word-prediction models such as BERT in natural settings. They can provide the appropriate prompts and tasks to answer questions about linguistic mechanisms underlying predictive responses. This paper adopted psycholinguistic datasets to probe language models’ commonsense reasoning. Findings suggest that GPT-3’s performance was mostly at chance in the psycholinguistic tasks. We also showed that DistillBERT had some understanding of the (implied) intent that’s shared among most people. Such intent is implicitly reflected in the usage of conversational implicatures and presuppositions. Whether or not fine-tuning improved its performance to human-level depends on the type of commonsense reasoning.
%R 10.18653/v1/2022.csrr-1.3
%U https://aclanthology.org/2022.csrr-1.3
%U https://doi.org/10.18653/v1/2022.csrr-1.3
%P 17-22
Markdown (Informal)
[Psycholinguistic Diagnosis of Language Models’ Commonsense Reasoning](https://aclanthology.org/2022.csrr-1.3) (Cong, CSRR 2022)
ACL