@inproceedings{sieker-etal-2023-beyond,
title = "Beyond the Bias: Unveiling the Quality of Implicit Causality Prompt Continuations in Language Models",
author = "Sieker, Judith and
Bott, Oliver and
Solstad, Torgrim and
Zarrie{\ss}, Sina",
editor = "Keet, C. Maria and
Lee, Hung-Yi and
Zarrie{\ss}, Sina",
booktitle = "Proceedings of the 16th International Natural Language Generation Conference",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.inlg-main.15/",
doi = "10.18653/v1/2023.inlg-main.15",
pages = "206--220",
abstract = "Recent studies have used human continuations of Implicit Causality (IC) prompts collected in linguistic experiments to evaluate discourse understanding in large language models (LLMs), focusing on the well-known IC coreference bias in the LLMs' predictions of the next word following the prompt. In this study, we investigate how continuations of IC prompts can be used to evaluate the text generation capabilities of LLMs in a linguistically controlled setting. We conduct an experiment using two open-source GPT-based models, employing human evaluation to assess different aspects of continuation quality. Our findings show that LLMs struggle in particular with generating coherent continuations in this rather simple setting, indicating a lack of discourse knowledge beyond the well-known IC bias. Our results also suggest that a bias congruent continuation does not necessarily equate to a higher continuation quality. Furthermore, our study draws upon insights from the Uniform Information Density hypothesis, testing different prompt modifications and decoding procedures and showing that sampling-based methods are particularly sensitive to the information density of the prompts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sieker-etal-2023-beyond">
<titleInfo>
<title>Beyond the Bias: Unveiling the Quality of Implicit Causality Prompt Continuations in Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Sieker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Bott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torgrim</namePart>
<namePart type="family">Solstad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-Yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent studies have used human continuations of Implicit Causality (IC) prompts collected in linguistic experiments to evaluate discourse understanding in large language models (LLMs), focusing on the well-known IC coreference bias in the LLMs’ predictions of the next word following the prompt. In this study, we investigate how continuations of IC prompts can be used to evaluate the text generation capabilities of LLMs in a linguistically controlled setting. We conduct an experiment using two open-source GPT-based models, employing human evaluation to assess different aspects of continuation quality. Our findings show that LLMs struggle in particular with generating coherent continuations in this rather simple setting, indicating a lack of discourse knowledge beyond the well-known IC bias. Our results also suggest that a bias congruent continuation does not necessarily equate to a higher continuation quality. Furthermore, our study draws upon insights from the Uniform Information Density hypothesis, testing different prompt modifications and decoding procedures and showing that sampling-based methods are particularly sensitive to the information density of the prompts.</abstract>
<identifier type="citekey">sieker-etal-2023-beyond</identifier>
<identifier type="doi">10.18653/v1/2023.inlg-main.15</identifier>
<location>
<url>https://aclanthology.org/2023.inlg-main.15/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>206</start>
<end>220</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond the Bias: Unveiling the Quality of Implicit Causality Prompt Continuations in Language Models
%A Sieker, Judith
%A Bott, Oliver
%A Solstad, Torgrim
%A Zarrieß, Sina
%Y Keet, C. Maria
%Y Lee, Hung-Yi
%Y Zarrieß, Sina
%S Proceedings of the 16th International Natural Language Generation Conference
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F sieker-etal-2023-beyond
%X Recent studies have used human continuations of Implicit Causality (IC) prompts collected in linguistic experiments to evaluate discourse understanding in large language models (LLMs), focusing on the well-known IC coreference bias in the LLMs’ predictions of the next word following the prompt. In this study, we investigate how continuations of IC prompts can be used to evaluate the text generation capabilities of LLMs in a linguistically controlled setting. We conduct an experiment using two open-source GPT-based models, employing human evaluation to assess different aspects of continuation quality. Our findings show that LLMs struggle in particular with generating coherent continuations in this rather simple setting, indicating a lack of discourse knowledge beyond the well-known IC bias. Our results also suggest that a bias congruent continuation does not necessarily equate to a higher continuation quality. Furthermore, our study draws upon insights from the Uniform Information Density hypothesis, testing different prompt modifications and decoding procedures and showing that sampling-based methods are particularly sensitive to the information density of the prompts.
%R 10.18653/v1/2023.inlg-main.15
%U https://aclanthology.org/2023.inlg-main.15/
%U https://doi.org/10.18653/v1/2023.inlg-main.15
%P 206-220
Markdown (Informal)
[Beyond the Bias: Unveiling the Quality of Implicit Causality Prompt Continuations in Language Models](https://aclanthology.org/2023.inlg-main.15/) (Sieker et al., INLG-SIGDIAL 2023)
ACL