@inproceedings{caron-srivastava-2023-manipulating,
title = "Manipulating the Perceived Personality Traits of Language Models",
author = "Caron, Graham and
Srivastava, Shashank",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.156",
doi = "10.18653/v1/2023.findings-emnlp.156",
pages = "2370--2386",
abstract = "Psychology research has long explored aspects of human personality like extroversion, agreeableness and emotional stability, three of the personality traits that make up the {`}Big Five{'}. Categorizations like the {`}Big Five{'} are commonly used to assess and diagnose personality types. In this work, we explore whether text generated from large language models exhibits consistency in it{'}s perceived {`}Big Five{'} personality traits. For example, is a language model such as GPT2 likely to respond in a consistent way if asked to go out to a party? We also show that when exposed to different types of contexts (such as personality descriptions, or answers to diagnostic questions about personality traits), language models such as BERT and GPT2 consistently identify and mirror personality markers in those contexts. This behavior illustrates an ability to be manipulated in a predictable way (with correlations up to 0.84 between intended and realized changes in personality traits), and frames them as tools for controlling personas in applications such as dialog systems. We contribute two data-sets of personality descriptions of humans subjects.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="caron-srivastava-2023-manipulating">
<titleInfo>
<title>Manipulating the Perceived Personality Traits of Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Caron</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashank</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Psychology research has long explored aspects of human personality like extroversion, agreeableness and emotional stability, three of the personality traits that make up the ‘Big Five’. Categorizations like the ‘Big Five’ are commonly used to assess and diagnose personality types. In this work, we explore whether text generated from large language models exhibits consistency in it’s perceived ‘Big Five’ personality traits. For example, is a language model such as GPT2 likely to respond in a consistent way if asked to go out to a party? We also show that when exposed to different types of contexts (such as personality descriptions, or answers to diagnostic questions about personality traits), language models such as BERT and GPT2 consistently identify and mirror personality markers in those contexts. This behavior illustrates an ability to be manipulated in a predictable way (with correlations up to 0.84 between intended and realized changes in personality traits), and frames them as tools for controlling personas in applications such as dialog systems. We contribute two data-sets of personality descriptions of humans subjects.</abstract>
<identifier type="citekey">caron-srivastava-2023-manipulating</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.156</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.156</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>2370</start>
<end>2386</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Manipulating the Perceived Personality Traits of Language Models
%A Caron, Graham
%A Srivastava, Shashank
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F caron-srivastava-2023-manipulating
%X Psychology research has long explored aspects of human personality like extroversion, agreeableness and emotional stability, three of the personality traits that make up the ‘Big Five’. Categorizations like the ‘Big Five’ are commonly used to assess and diagnose personality types. In this work, we explore whether text generated from large language models exhibits consistency in it’s perceived ‘Big Five’ personality traits. For example, is a language model such as GPT2 likely to respond in a consistent way if asked to go out to a party? We also show that when exposed to different types of contexts (such as personality descriptions, or answers to diagnostic questions about personality traits), language models such as BERT and GPT2 consistently identify and mirror personality markers in those contexts. This behavior illustrates an ability to be manipulated in a predictable way (with correlations up to 0.84 between intended and realized changes in personality traits), and frames them as tools for controlling personas in applications such as dialog systems. We contribute two data-sets of personality descriptions of humans subjects.
%R 10.18653/v1/2023.findings-emnlp.156
%U https://aclanthology.org/2023.findings-emnlp.156
%U https://doi.org/10.18653/v1/2023.findings-emnlp.156
%P 2370-2386
Markdown (Informal)
[Manipulating the Perceived Personality Traits of Language Models](https://aclanthology.org/2023.findings-emnlp.156) (Caron & Srivastava, Findings 2023)
ACL