@inproceedings{truong-etal-2023-language,
title = "Language models are not naysayers: an analysis of language models on negation benchmarks",
author = "Truong, Thinh Hung and
Baldwin, Timothy and
Verspoor, Karin and
Cohn, Trevor",
editor = "Palmer, Alexis and
Camacho-collados, Jose",
booktitle = "Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.starsem-1.10",
doi = "10.18653/v1/2023.starsem-1.10",
pages = "101--114",
abstract = "Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models ({``}LLMs{''}) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundamental linguistic phenomenon that is central to language understanding. We evaluate different LLMs - including the open-source GPT-neo, GPT-3, and InstructGPT - against a wide range of negation benchmarks. Through systematic experimentation with varying model sizes and prompts, we show that LLMs have several limitations including insensitivity to the presence of negation, an inability to capture the lexical semantics of negation, and a failure to reason under negation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="truong-etal-2023-language">
<titleInfo>
<title>Language models are not naysayers: an analysis of language models on negation benchmarks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thinh</namePart>
<namePart type="given">Hung</namePart>
<namePart type="family">Truong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Verspoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models (“LLMs”) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundamental linguistic phenomenon that is central to language understanding. We evaluate different LLMs - including the open-source GPT-neo, GPT-3, and InstructGPT - against a wide range of negation benchmarks. Through systematic experimentation with varying model sizes and prompts, we show that LLMs have several limitations including insensitivity to the presence of negation, an inability to capture the lexical semantics of negation, and a failure to reason under negation.</abstract>
<identifier type="citekey">truong-etal-2023-language</identifier>
<identifier type="doi">10.18653/v1/2023.starsem-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.starsem-1.10</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>101</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language models are not naysayers: an analysis of language models on negation benchmarks
%A Truong, Thinh Hung
%A Baldwin, Timothy
%A Verspoor, Karin
%A Cohn, Trevor
%Y Palmer, Alexis
%Y Camacho-collados, Jose
%S Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F truong-etal-2023-language
%X Negation has been shown to be a major bottleneck for masked language models, such as BERT. However, whether this finding still holds for larger-sized auto-regressive language models (“LLMs”) has not been studied comprehensively. With the ever-increasing volume of research and applications of LLMs, we take a step back to evaluate the ability of current-generation LLMs to handle negation, a fundamental linguistic phenomenon that is central to language understanding. We evaluate different LLMs - including the open-source GPT-neo, GPT-3, and InstructGPT - against a wide range of negation benchmarks. Through systematic experimentation with varying model sizes and prompts, we show that LLMs have several limitations including insensitivity to the presence of negation, an inability to capture the lexical semantics of negation, and a failure to reason under negation.
%R 10.18653/v1/2023.starsem-1.10
%U https://aclanthology.org/2023.starsem-1.10
%U https://doi.org/10.18653/v1/2023.starsem-1.10
%P 101-114
Markdown (Informal)
[Language models are not naysayers: an analysis of language models on negation benchmarks](https://aclanthology.org/2023.starsem-1.10) (Truong et al., *SEM 2023)
ACL