@inproceedings{malberg-etal-2025-comprehensive,
title = "A Comprehensive Evaluation of Cognitive Biases in {LLM}s",
author = "Malberg, Simon and
Poletukhin, Roman and
Schuster, Carolin M. and
Groh, Georg",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Bizzoni, Yuri and
Miyagawa, So and
Alnajjar, Khalid},
booktitle = "Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities",
month = may,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nlp4dh-1.50/",
doi = "10.18653/v1/2025.nlp4dh-1.50",
pages = "578--613",
ISBN = "979-8-89176-234-3",
abstract = "We present a large-scale evaluation of 30 cognitive biases in 20 state-of-the-art large language models (LLMs) under various decision-making scenarios. Our contributions include a novel general-purpose test framework for reliable and large-scale generation of tests for LLMs, a benchmark dataset with 30,000 tests for detecting cognitive biases in LLMs, and a comprehensive assessment of the biases found in the 20 evaluated LLMs. Our work confirms and broadens previous findings suggesting the presence of cognitive biases in LLMs by reporting evidence of all 30 tested biases in at least some of the 20 LLMs. We publish our framework code and dataset to encourage future research on cognitive biases in LLMs: https://github.com/simonmalberg/cognitive-biases-in-llms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="malberg-etal-2025-comprehensive">
<titleInfo>
<title>A Comprehensive Evaluation of Cognitive Biases in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Malberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Poletukhin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolin</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Schuster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Groh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-234-3</identifier>
</relatedItem>
<abstract>We present a large-scale evaluation of 30 cognitive biases in 20 state-of-the-art large language models (LLMs) under various decision-making scenarios. Our contributions include a novel general-purpose test framework for reliable and large-scale generation of tests for LLMs, a benchmark dataset with 30,000 tests for detecting cognitive biases in LLMs, and a comprehensive assessment of the biases found in the 20 evaluated LLMs. Our work confirms and broadens previous findings suggesting the presence of cognitive biases in LLMs by reporting evidence of all 30 tested biases in at least some of the 20 LLMs. We publish our framework code and dataset to encourage future research on cognitive biases in LLMs: https://github.com/simonmalberg/cognitive-biases-in-llms.</abstract>
<identifier type="citekey">malberg-etal-2025-comprehensive</identifier>
<identifier type="doi">10.18653/v1/2025.nlp4dh-1.50</identifier>
<location>
<url>https://aclanthology.org/2025.nlp4dh-1.50/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>578</start>
<end>613</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comprehensive Evaluation of Cognitive Biases in LLMs
%A Malberg, Simon
%A Poletukhin, Roman
%A Schuster, Carolin M.
%A Groh, Georg
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Bizzoni, Yuri
%Y Miyagawa, So
%Y Alnajjar, Khalid
%S Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, USA
%@ 979-8-89176-234-3
%F malberg-etal-2025-comprehensive
%X We present a large-scale evaluation of 30 cognitive biases in 20 state-of-the-art large language models (LLMs) under various decision-making scenarios. Our contributions include a novel general-purpose test framework for reliable and large-scale generation of tests for LLMs, a benchmark dataset with 30,000 tests for detecting cognitive biases in LLMs, and a comprehensive assessment of the biases found in the 20 evaluated LLMs. Our work confirms and broadens previous findings suggesting the presence of cognitive biases in LLMs by reporting evidence of all 30 tested biases in at least some of the 20 LLMs. We publish our framework code and dataset to encourage future research on cognitive biases in LLMs: https://github.com/simonmalberg/cognitive-biases-in-llms.
%R 10.18653/v1/2025.nlp4dh-1.50
%U https://aclanthology.org/2025.nlp4dh-1.50/
%U https://doi.org/10.18653/v1/2025.nlp4dh-1.50
%P 578-613
Markdown (Informal)
[A Comprehensive Evaluation of Cognitive Biases in LLMs](https://aclanthology.org/2025.nlp4dh-1.50/) (Malberg et al., NLP4DH 2025)
ACL
- Simon Malberg, Roman Poletukhin, Carolin M. Schuster, and Georg Groh. 2025. A Comprehensive Evaluation of Cognitive Biases in LLMs. In Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities, pages 578–613, Albuquerque, USA. Association for Computational Linguistics.