@article{allaway-etal-2024-exceptions,
title = "Exceptions, Instantiations, and Overgeneralization: Insights into How Language Models Process Generics",
author = "Allaway, Emily and
Bhagavatula, Chandra and
Hwang, Jena D. and
McKeown, Kathleen and
Leslie, Sarah-Jane",
journal = "Computational Linguistics",
volume = "50",
number = "3",
month = dec,
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2024.cl-4.2/",
doi = "10.1162/coli_a_00530",
pages = "1211--1275",
abstract = "Large language models (LLMs) have garnered a great deal of attention for their exceptional generative performance on commonsense and reasoning tasks. In this work, we investigate LLMs' capabilities for generalization using a particularly challenging type of statement: generics. Generics express generalizations (e.g., birds can fly) but do so without explicit quantification. They are notable because they generalize over their instantiations (e.g., sparrows can fly) yet hold true even in the presence of exceptions (e.g., penguins do not). For humans, these generic generalizations play a fundamental role in cognition, concept acquisition, and intuitive reasoning. We investigate how LLMs respond to and reason about generics. To this end, we first propose a framework grounded in pragmatics to automatically generate both exceptions and instantiations {--} collectively exemplars. We make use of focus{---}a pragmatic phenomenon that highlights meaning-bearing elements in a sentence{---}to capture the full range of interpretations of generics across different contexts of use. This allows us to derive precise logical definitions for exemplars and operationalize them to automatically generate exemplars from LLMs. Using our system, we generate a dataset of {\ensuremath{\sim}}370kexemplars across {\ensuremath{\sim}}17k generics and conduct a human validation of a sample of the generated data. We use our final generated dataset to investigate how LLMs reason about generics. Humans have a documented tendency to conflate universally quantified statements (e.g., all birds can fly) with generics. Therefore, we probe whether LLMs exhibit similar overgeneralization behavior in terms of quantification and in property inheritance. We find that LLMs do show evidence of overgeneralization, although they sometimes struggle to reason about exceptions. Furthermore, we find that LLMs may exhibit similar non-logical behavior to humans when considering property inheritance from generics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="allaway-etal-2024-exceptions">
<titleInfo>
<title>Exceptions, Instantiations, and Overgeneralization: Insights into How Language Models Process Generics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Allaway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chandra</namePart>
<namePart type="family">Bhagavatula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jena</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathleen</namePart>
<namePart type="family">McKeown</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah-Jane</namePart>
<namePart type="family">Leslie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Large language models (LLMs) have garnered a great deal of attention for their exceptional generative performance on commonsense and reasoning tasks. In this work, we investigate LLMs’ capabilities for generalization using a particularly challenging type of statement: generics. Generics express generalizations (e.g., birds can fly) but do so without explicit quantification. They are notable because they generalize over their instantiations (e.g., sparrows can fly) yet hold true even in the presence of exceptions (e.g., penguins do not). For humans, these generic generalizations play a fundamental role in cognition, concept acquisition, and intuitive reasoning. We investigate how LLMs respond to and reason about generics. To this end, we first propose a framework grounded in pragmatics to automatically generate both exceptions and instantiations – collectively exemplars. We make use of focus—a pragmatic phenomenon that highlights meaning-bearing elements in a sentence—to capture the full range of interpretations of generics across different contexts of use. This allows us to derive precise logical definitions for exemplars and operationalize them to automatically generate exemplars from LLMs. Using our system, we generate a dataset of \ensuremath\sim370kexemplars across \ensuremath\sim17k generics and conduct a human validation of a sample of the generated data. We use our final generated dataset to investigate how LLMs reason about generics. Humans have a documented tendency to conflate universally quantified statements (e.g., all birds can fly) with generics. Therefore, we probe whether LLMs exhibit similar overgeneralization behavior in terms of quantification and in property inheritance. We find that LLMs do show evidence of overgeneralization, although they sometimes struggle to reason about exceptions. Furthermore, we find that LLMs may exhibit similar non-logical behavior to humans when considering property inheritance from generics.</abstract>
<identifier type="citekey">allaway-etal-2024-exceptions</identifier>
<identifier type="doi">10.1162/coli_a_00530</identifier>
<location>
<url>https://aclanthology.org/2024.cl-4.2/</url>
</location>
<part>
<date>2024-12</date>
<detail type="volume"><number>50</number></detail>
<detail type="issue"><number>3</number></detail>
<extent unit="page">
<start>1211</start>
<end>1275</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Exceptions, Instantiations, and Overgeneralization: Insights into How Language Models Process Generics
%A Allaway, Emily
%A Bhagavatula, Chandra
%A Hwang, Jena D.
%A McKeown, Kathleen
%A Leslie, Sarah-Jane
%J Computational Linguistics
%D 2024
%8 December
%V 50
%N 3
%I MIT Press
%C Cambridge, MA
%F allaway-etal-2024-exceptions
%X Large language models (LLMs) have garnered a great deal of attention for their exceptional generative performance on commonsense and reasoning tasks. In this work, we investigate LLMs’ capabilities for generalization using a particularly challenging type of statement: generics. Generics express generalizations (e.g., birds can fly) but do so without explicit quantification. They are notable because they generalize over their instantiations (e.g., sparrows can fly) yet hold true even in the presence of exceptions (e.g., penguins do not). For humans, these generic generalizations play a fundamental role in cognition, concept acquisition, and intuitive reasoning. We investigate how LLMs respond to and reason about generics. To this end, we first propose a framework grounded in pragmatics to automatically generate both exceptions and instantiations – collectively exemplars. We make use of focus—a pragmatic phenomenon that highlights meaning-bearing elements in a sentence—to capture the full range of interpretations of generics across different contexts of use. This allows us to derive precise logical definitions for exemplars and operationalize them to automatically generate exemplars from LLMs. Using our system, we generate a dataset of \ensuremath\sim370kexemplars across \ensuremath\sim17k generics and conduct a human validation of a sample of the generated data. We use our final generated dataset to investigate how LLMs reason about generics. Humans have a documented tendency to conflate universally quantified statements (e.g., all birds can fly) with generics. Therefore, we probe whether LLMs exhibit similar overgeneralization behavior in terms of quantification and in property inheritance. We find that LLMs do show evidence of overgeneralization, although they sometimes struggle to reason about exceptions. Furthermore, we find that LLMs may exhibit similar non-logical behavior to humans when considering property inheritance from generics.
%R 10.1162/coli_a_00530
%U https://aclanthology.org/2024.cl-4.2/
%U https://doi.org/10.1162/coli_a_00530
%P 1211-1275
Markdown (Informal)
[Exceptions, Instantiations, and Overgeneralization: Insights into How Language Models Process Generics](https://aclanthology.org/2024.cl-4.2/) (Allaway et al., CL 2024)
ACL