@inproceedings{sileo-moens-2023-probing,
title = "Probing neural language models for understanding of words of estimative probability",
author = "Sileo, Damien and
Moens, Marie-francine",
editor = "Palmer, Alexis and
Camacho-collados, Jose",
booktitle = "Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.starsem-1.41",
doi = "10.18653/v1/2023.starsem-1.41",
pages = "469--476",
abstract = "Words of Estimative Probability (WEP) are phrases used to express the plausibility of a statement. Examples include terms like {\textbackslash}textit{probably, maybe, likely, doubt, unlikely}, and {\textbackslash}textit{impossible}. Surveys have shown that human evaluators tend to agree when assigning numerical probability levels to these WEPs. For instance, the term {\textbackslash}textit{highly likely} equates to a median probability of {\$}0.90{{\textbackslash}pm}0.08{\$} according to a survey by {\textbackslash}citet{fagen-ulmschneider}.In this study, our focus is to gauge the competency of neural language processing models in accurately capturing the consensual probability level associated with each WEP. Our first approach is utilizing the UNLI dataset {\textbackslash}cite{chen-etal-2020-uncertain}, which links premises and hypotheses with their perceived joint probability {\$}p{\$}. From this, we craft prompts in the form: ''[{\textbackslash}textsc{Premise}]. [{\textbackslash}textsc{Wep}], [{\textbackslash}textsc{Hypothesis}].{''} This allows us to evaluate whether language models can predict if the consensual probability level of a WEP aligns closely with {\$}p{\$}.In our second approach, we develop a dataset based on WEP-focused probabilistic reasoning to assess if language models can logically process WEP compositions. For example, given the prompt ''[{\textbackslash}textsc{EventA}] {\textbackslash}textit{is likely}. [{\textbackslash}textsc{EventB}] {\textbackslash}textit{is impossible}.{''}, a well-functioning language model should not conclude that [{\textbackslash}textsc{EventA{\$}{\textbackslash}{\&}amp;{\$}B}] is likely. Through our study, we observe that both tasks present challenges to out-of-the-box English language models. However, we also demonstrate that fine-tuning these models can lead to significant and transferable improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sileo-moens-2023-probing">
<titleInfo>
<title>Probing neural language models for understanding of words of estimative probability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Damien</namePart>
<namePart type="family">Sileo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Words of Estimative Probability (WEP) are phrases used to express the plausibility of a statement. Examples include terms like \textbackslashtextitprobably, maybe, likely, doubt, unlikely, and \textbackslashtextitimpossible. Surveys have shown that human evaluators tend to agree when assigning numerical probability levels to these WEPs. For instance, the term \textbackslashtextithighly likely equates to a median probability of $0.90\textbackslashpm0.08$ according to a survey by \textbackslashcitetfagen-ulmschneider.In this study, our focus is to gauge the competency of neural language processing models in accurately capturing the consensual probability level associated with each WEP. Our first approach is utilizing the UNLI dataset \textbackslashcitechen-etal-2020-uncertain, which links premises and hypotheses with their perceived joint probability $p$. From this, we craft prompts in the form: ”[\textbackslashtextscPremise]. [\textbackslashtextscWep], [\textbackslashtextscHypothesis].” This allows us to evaluate whether language models can predict if the consensual probability level of a WEP aligns closely with $p$.In our second approach, we develop a dataset based on WEP-focused probabilistic reasoning to assess if language models can logically process WEP compositions. For example, given the prompt ”[\textbackslashtextscEventA] \textbackslashtextitis likely. [\textbackslashtextscEventB] \textbackslashtextitis impossible.”, a well-functioning language model should not conclude that [\textbackslashtextscEventA$\textbackslash&amp;$B] is likely. Through our study, we observe that both tasks present challenges to out-of-the-box English language models. However, we also demonstrate that fine-tuning these models can lead to significant and transferable improvements.</abstract>
<identifier type="citekey">sileo-moens-2023-probing</identifier>
<identifier type="doi">10.18653/v1/2023.starsem-1.41</identifier>
<location>
<url>https://aclanthology.org/2023.starsem-1.41</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>469</start>
<end>476</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Probing neural language models for understanding of words of estimative probability
%A Sileo, Damien
%A Moens, Marie-francine
%Y Palmer, Alexis
%Y Camacho-collados, Jose
%S Proceedings of the 12th Joint Conference on Lexical and Computational Semantics (*SEM 2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F sileo-moens-2023-probing
%X Words of Estimative Probability (WEP) are phrases used to express the plausibility of a statement. Examples include terms like \textbackslashtextitprobably, maybe, likely, doubt, unlikely, and \textbackslashtextitimpossible. Surveys have shown that human evaluators tend to agree when assigning numerical probability levels to these WEPs. For instance, the term \textbackslashtextithighly likely equates to a median probability of $0.90\textbackslashpm0.08$ according to a survey by \textbackslashcitetfagen-ulmschneider.In this study, our focus is to gauge the competency of neural language processing models in accurately capturing the consensual probability level associated with each WEP. Our first approach is utilizing the UNLI dataset \textbackslashcitechen-etal-2020-uncertain, which links premises and hypotheses with their perceived joint probability $p$. From this, we craft prompts in the form: ”[\textbackslashtextscPremise]. [\textbackslashtextscWep], [\textbackslashtextscHypothesis].” This allows us to evaluate whether language models can predict if the consensual probability level of a WEP aligns closely with $p$.In our second approach, we develop a dataset based on WEP-focused probabilistic reasoning to assess if language models can logically process WEP compositions. For example, given the prompt ”[\textbackslashtextscEventA] \textbackslashtextitis likely. [\textbackslashtextscEventB] \textbackslashtextitis impossible.”, a well-functioning language model should not conclude that [\textbackslashtextscEventA$\textbackslash&$B] is likely. Through our study, we observe that both tasks present challenges to out-of-the-box English language models. However, we also demonstrate that fine-tuning these models can lead to significant and transferable improvements.
%R 10.18653/v1/2023.starsem-1.41
%U https://aclanthology.org/2023.starsem-1.41
%U https://doi.org/10.18653/v1/2023.starsem-1.41
%P 469-476
Markdown (Informal)
[Probing neural language models for understanding of words of estimative probability](https://aclanthology.org/2023.starsem-1.41) (Sileo & Moens, *SEM 2023)
ACL