@inproceedings{wildenburg-etal-2024-pre,
title = "Do Pre-Trained Language Models Detect and Understand Semantic Underspecification? Ask the {DUST}!",
author = "Wildenburg, Frank and
Hanna, Michael and
Pezzelle, Sandro",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.572/",
doi = "10.18653/v1/2024.findings-acl.572",
pages = "9598--9613",
abstract = "In everyday language use, speakers frequently utter and interpret sentences that are semantically underspecified, namely, whose content is insufficient to fully convey their message or interpret them univocally. For example, to interpret the underspecified sentence {\textquotedblleft}Don`t spend too much{\textquotedblright}, which leaves implicit what (not) to spend, additional linguistic context or outside knowledge is needed. In this work, we propose a novel Dataset of semantically Underspecified Sentences grouped by Type (DUST) and use it to study whether pre-trained language models (LMs) correctly identify and interpret underspecified sentences. We find that newer LMs are reasonably able to identify underspecified sentences when explicitly prompted. However, interpreting them correctly is much harder for any LMs. Our experiments show that when interpreting underspecified sentences, LMs exhibit little uncertainty, contrary to what theoretical accounts of underspecification would predict. Overall, our study reveals limitations in current models' processing of sentence semantics and highlights the importance of using naturalistic data and communicative scenarios when evaluating LMs' language capabilities."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wildenburg-etal-2024-pre">
<titleInfo>
<title>Do Pre-Trained Language Models Detect and Understand Semantic Underspecification? Ask the DUST!</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Wildenburg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Hanna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In everyday language use, speakers frequently utter and interpret sentences that are semantically underspecified, namely, whose content is insufficient to fully convey their message or interpret them univocally. For example, to interpret the underspecified sentence “Don‘t spend too much”, which leaves implicit what (not) to spend, additional linguistic context or outside knowledge is needed. In this work, we propose a novel Dataset of semantically Underspecified Sentences grouped by Type (DUST) and use it to study whether pre-trained language models (LMs) correctly identify and interpret underspecified sentences. We find that newer LMs are reasonably able to identify underspecified sentences when explicitly prompted. However, interpreting them correctly is much harder for any LMs. Our experiments show that when interpreting underspecified sentences, LMs exhibit little uncertainty, contrary to what theoretical accounts of underspecification would predict. Overall, our study reveals limitations in current models’ processing of sentence semantics and highlights the importance of using naturalistic data and communicative scenarios when evaluating LMs’ language capabilities.</abstract>
<identifier type="citekey">wildenburg-etal-2024-pre</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.572</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.572/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>9598</start>
<end>9613</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do Pre-Trained Language Models Detect and Understand Semantic Underspecification? Ask the DUST!
%A Wildenburg, Frank
%A Hanna, Michael
%A Pezzelle, Sandro
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F wildenburg-etal-2024-pre
%X In everyday language use, speakers frequently utter and interpret sentences that are semantically underspecified, namely, whose content is insufficient to fully convey their message or interpret them univocally. For example, to interpret the underspecified sentence “Don‘t spend too much”, which leaves implicit what (not) to spend, additional linguistic context or outside knowledge is needed. In this work, we propose a novel Dataset of semantically Underspecified Sentences grouped by Type (DUST) and use it to study whether pre-trained language models (LMs) correctly identify and interpret underspecified sentences. We find that newer LMs are reasonably able to identify underspecified sentences when explicitly prompted. However, interpreting them correctly is much harder for any LMs. Our experiments show that when interpreting underspecified sentences, LMs exhibit little uncertainty, contrary to what theoretical accounts of underspecification would predict. Overall, our study reveals limitations in current models’ processing of sentence semantics and highlights the importance of using naturalistic data and communicative scenarios when evaluating LMs’ language capabilities.
%R 10.18653/v1/2024.findings-acl.572
%U https://aclanthology.org/2024.findings-acl.572/
%U https://doi.org/10.18653/v1/2024.findings-acl.572
%P 9598-9613
Markdown (Informal)
[Do Pre-Trained Language Models Detect and Understand Semantic Underspecification? Ask the DUST!](https://aclanthology.org/2024.findings-acl.572/) (Wildenburg et al., Findings 2024)
ACL