@inproceedings{li-etal-2025-large,
title = "Can Large Language Models Understand You Better? An {MBTI} Personality Detection Dataset Aligned with Population Traits",
author = "Li, Bohan and
Guan, Jiannan and
Dou, Longxu and
Feng, Yunlong and
Wang, Dingzirui and
Xu, Yang and
Wang, Enbo and
Chen, Qiguang and
Wang, Bichen and
Xu, Xiao and
Zhang, Yimeng and
Qin, Libo and
Zhao, Yanyan and
Zhu, Qingfu and
Che, Wanxiang",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.339/",
pages = "5071--5081",
abstract = "The Myers-Briggs Type Indicator (MBTI) is one of the most influential personality theories reflecting individual differences in thinking, feeling, and behaving. MBTI personality detection has garnered considerable research interest and has evolved significantly over the years. However, this task tends to be overly optimistic, as it currently does not align well with the natural distribution of population personality traits. Specifically, the self-reported labels in existing datasets result in data quality issues and the hard labels fail to capture the full range of population personality distributions. In this paper, we identify the task by constructing MBTIBench, the first manually annotated MBTI personality detection dataset with soft labels, under the guidance of psychologists. Our experimental results confirm that soft labels can provide more benefits to other psychological tasks than hard labels. We highlight the polarized predictions and biases in LLMs as key directions for future research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-large">
<titleInfo>
<title>Can Large Language Models Understand You Better? An MBTI Personality Detection Dataset Aligned with Population Traits</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bohan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiannan</namePart>
<namePart type="family">Guan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Longxu</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunlong</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dingzirui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enbo</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiguang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bichen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yimeng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Libo</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanyan</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingfu</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Myers-Briggs Type Indicator (MBTI) is one of the most influential personality theories reflecting individual differences in thinking, feeling, and behaving. MBTI personality detection has garnered considerable research interest and has evolved significantly over the years. However, this task tends to be overly optimistic, as it currently does not align well with the natural distribution of population personality traits. Specifically, the self-reported labels in existing datasets result in data quality issues and the hard labels fail to capture the full range of population personality distributions. In this paper, we identify the task by constructing MBTIBench, the first manually annotated MBTI personality detection dataset with soft labels, under the guidance of psychologists. Our experimental results confirm that soft labels can provide more benefits to other psychological tasks than hard labels. We highlight the polarized predictions and biases in LLMs as key directions for future research.</abstract>
<identifier type="citekey">li-etal-2025-large</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.339/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5071</start>
<end>5081</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Large Language Models Understand You Better? An MBTI Personality Detection Dataset Aligned with Population Traits
%A Li, Bohan
%A Guan, Jiannan
%A Dou, Longxu
%A Feng, Yunlong
%A Wang, Dingzirui
%A Xu, Yang
%A Wang, Enbo
%A Chen, Qiguang
%A Wang, Bichen
%A Xu, Xiao
%A Zhang, Yimeng
%A Qin, Libo
%A Zhao, Yanyan
%A Zhu, Qingfu
%A Che, Wanxiang
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F li-etal-2025-large
%X The Myers-Briggs Type Indicator (MBTI) is one of the most influential personality theories reflecting individual differences in thinking, feeling, and behaving. MBTI personality detection has garnered considerable research interest and has evolved significantly over the years. However, this task tends to be overly optimistic, as it currently does not align well with the natural distribution of population personality traits. Specifically, the self-reported labels in existing datasets result in data quality issues and the hard labels fail to capture the full range of population personality distributions. In this paper, we identify the task by constructing MBTIBench, the first manually annotated MBTI personality detection dataset with soft labels, under the guidance of psychologists. Our experimental results confirm that soft labels can provide more benefits to other psychological tasks than hard labels. We highlight the polarized predictions and biases in LLMs as key directions for future research.
%U https://aclanthology.org/2025.coling-main.339/
%P 5071-5081
Markdown (Informal)
[Can Large Language Models Understand You Better? An MBTI Personality Detection Dataset Aligned with Population Traits](https://aclanthology.org/2025.coling-main.339/) (Li et al., COLING 2025)
ACL
- Bohan Li, Jiannan Guan, Longxu Dou, Yunlong Feng, Dingzirui Wang, Yang Xu, Enbo Wang, Qiguang Chen, Bichen Wang, Xiao Xu, Yimeng Zhang, Libo Qin, Yanyan Zhao, Qingfu Zhu, and Wanxiang Che. 2025. Can Large Language Models Understand You Better? An MBTI Personality Detection Dataset Aligned with Population Traits. In Proceedings of the 31st International Conference on Computational Linguistics, pages 5071–5081, Abu Dhabi, UAE. Association for Computational Linguistics.