@inproceedings{krause-etal-2023-confidently,
title = "Confidently Wrong: Exploring the Calibration and Expression of (Un)Certainty of Large Language Models in a Multilingual Setting",
author = "Krause, Lea and
Tufa, Wondimagegnhue and
Baez Santamaria, Selene and
Daza, Angel and
Khurana, Urja and
Vossen, Piek",
editor = "Gatt, Albert and
Gardent, Claire and
Cripwell, Liam and
Belz, Anya and
Borg, Claudia and
Erdem, Aykut and
Erdem, Erkut",
booktitle = "Proceedings of the Workshop on Multimodal, Multilingual Natural Language Generation and Multilingual WebNLG Challenge (MM-NLG 2023)",
month = sep,
year = "2023",
address = "Prague, Czech Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.mmnlg-1.1",
pages = "1--9",
abstract = "While the fluency and coherence of Large Language Models (LLMs) in text generation have seen significant improvements, their competency in generating appropriate expressions of uncertainty remains limited.Using a multilingual closed-book QA task and GPT-3.5, we explore how well LLMs are calibrated and express certainty across a diverse set of languages, including low-resource settings. Our results reveal strong performance in high-resource languages but a marked decline in performance in lower-resource languages. Across all, we observe an exaggerated expression of confidence in the model, which does not align with the correctness or likelihood of its responses. Our findings highlight the need for further research into accurate calibration of LLMs especially in a multilingual setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krause-etal-2023-confidently">
<titleInfo>
<title>Confidently Wrong: Exploring the Calibration and Expression of (Un)Certainty of Large Language Models in a Multilingual Setting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lea</namePart>
<namePart type="family">Krause</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wondimagegnhue</namePart>
<namePart type="family">Tufa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angel</namePart>
<namePart type="family">Daza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Urja</namePart>
<namePart type="family">Khurana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piek</namePart>
<namePart type="family">Vossen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Multimodal, Multilingual Natural Language Generation and Multilingual WebNLG Challenge (MM-NLG 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Gardent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liam</namePart>
<namePart type="family">Cripwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Borg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aykut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erkut</namePart>
<namePart type="family">Erdem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czech Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While the fluency and coherence of Large Language Models (LLMs) in text generation have seen significant improvements, their competency in generating appropriate expressions of uncertainty remains limited.Using a multilingual closed-book QA task and GPT-3.5, we explore how well LLMs are calibrated and express certainty across a diverse set of languages, including low-resource settings. Our results reveal strong performance in high-resource languages but a marked decline in performance in lower-resource languages. Across all, we observe an exaggerated expression of confidence in the model, which does not align with the correctness or likelihood of its responses. Our findings highlight the need for further research into accurate calibration of LLMs especially in a multilingual setting.</abstract>
<identifier type="citekey">krause-etal-2023-confidently</identifier>
<location>
<url>https://aclanthology.org/2023.mmnlg-1.1</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Confidently Wrong: Exploring the Calibration and Expression of (Un)Certainty of Large Language Models in a Multilingual Setting
%A Krause, Lea
%A Tufa, Wondimagegnhue
%A Baez Santamaria, Selene
%A Daza, Angel
%A Khurana, Urja
%A Vossen, Piek
%Y Gatt, Albert
%Y Gardent, Claire
%Y Cripwell, Liam
%Y Belz, Anya
%Y Borg, Claudia
%Y Erdem, Aykut
%Y Erdem, Erkut
%S Proceedings of the Workshop on Multimodal, Multilingual Natural Language Generation and Multilingual WebNLG Challenge (MM-NLG 2023)
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czech Republic
%F krause-etal-2023-confidently
%X While the fluency and coherence of Large Language Models (LLMs) in text generation have seen significant improvements, their competency in generating appropriate expressions of uncertainty remains limited.Using a multilingual closed-book QA task and GPT-3.5, we explore how well LLMs are calibrated and express certainty across a diverse set of languages, including low-resource settings. Our results reveal strong performance in high-resource languages but a marked decline in performance in lower-resource languages. Across all, we observe an exaggerated expression of confidence in the model, which does not align with the correctness or likelihood of its responses. Our findings highlight the need for further research into accurate calibration of LLMs especially in a multilingual setting.
%U https://aclanthology.org/2023.mmnlg-1.1
%P 1-9
Markdown (Informal)
[Confidently Wrong: Exploring the Calibration and Expression of (Un)Certainty of Large Language Models in a Multilingual Setting](https://aclanthology.org/2023.mmnlg-1.1) (Krause et al., MMNLG-WS 2023)
ACL