@inproceedings{gjurkovic-snajder-2018-reddit,
title = "{R}eddit: A Gold Mine for Personality Prediction",
author = "Gjurkovi{\'c}, Matej and
{\v{S}}najder, Jan",
editor = "Nissim, Malvina and
Patti, Viviana and
Plank, Barbara and
Wagner, Claudia",
booktitle = "Proceedings of the Second Workshop on Computational Modeling of People{'}s Opinions, Personality, and Emotions in Social Media",
month = jun,
year = "2018",
address = "New Orleans, Louisiana, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1112",
doi = "10.18653/v1/W18-1112",
pages = "87--97",
abstract = "Automated personality prediction from social media is gaining increasing attention in natural language processing and social sciences communities. However, due to high labeling costs and privacy issues, the few publicly available datasets are of limited size and low topic diversity. We address this problem by introducing a large-scale dataset derived from Reddit, a source so far overlooked for personality prediction. The dataset is labeled with Myers-Briggs Type Indicators (MBTI) and comes with a rich set of features for more than 9k users. We carry out a preliminary feature analysis, revealing marked differences between the MBTI dimensions and poles. Furthermore, we use the dataset to train and evaluate benchmark personality prediction models, achieving macro F1-scores between 67{\%} and 82{\%} on the individual dimensions and 82{\%} accuracy for exact or one-off accurate type prediction. These results are encouraging and comparable with the reliability of standardized tests.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gjurkovic-snajder-2018-reddit">
<titleInfo>
<title>Reddit: A Gold Mine for Personality Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matej</namePart>
<namePart type="family">Gjurković</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Šnajder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Computational Modeling of People’s Opinions, Personality, and Emotions in Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviana</namePart>
<namePart type="family">Patti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Wagner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automated personality prediction from social media is gaining increasing attention in natural language processing and social sciences communities. However, due to high labeling costs and privacy issues, the few publicly available datasets are of limited size and low topic diversity. We address this problem by introducing a large-scale dataset derived from Reddit, a source so far overlooked for personality prediction. The dataset is labeled with Myers-Briggs Type Indicators (MBTI) and comes with a rich set of features for more than 9k users. We carry out a preliminary feature analysis, revealing marked differences between the MBTI dimensions and poles. Furthermore, we use the dataset to train and evaluate benchmark personality prediction models, achieving macro F1-scores between 67% and 82% on the individual dimensions and 82% accuracy for exact or one-off accurate type prediction. These results are encouraging and comparable with the reliability of standardized tests.</abstract>
<identifier type="citekey">gjurkovic-snajder-2018-reddit</identifier>
<identifier type="doi">10.18653/v1/W18-1112</identifier>
<location>
<url>https://aclanthology.org/W18-1112</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>87</start>
<end>97</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Reddit: A Gold Mine for Personality Prediction
%A Gjurković, Matej
%A Šnajder, Jan
%Y Nissim, Malvina
%Y Patti, Viviana
%Y Plank, Barbara
%Y Wagner, Claudia
%S Proceedings of the Second Workshop on Computational Modeling of People’s Opinions, Personality, and Emotions in Social Media
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana, USA
%F gjurkovic-snajder-2018-reddit
%X Automated personality prediction from social media is gaining increasing attention in natural language processing and social sciences communities. However, due to high labeling costs and privacy issues, the few publicly available datasets are of limited size and low topic diversity. We address this problem by introducing a large-scale dataset derived from Reddit, a source so far overlooked for personality prediction. The dataset is labeled with Myers-Briggs Type Indicators (MBTI) and comes with a rich set of features for more than 9k users. We carry out a preliminary feature analysis, revealing marked differences between the MBTI dimensions and poles. Furthermore, we use the dataset to train and evaluate benchmark personality prediction models, achieving macro F1-scores between 67% and 82% on the individual dimensions and 82% accuracy for exact or one-off accurate type prediction. These results are encouraging and comparable with the reliability of standardized tests.
%R 10.18653/v1/W18-1112
%U https://aclanthology.org/W18-1112
%U https://doi.org/10.18653/v1/W18-1112
%P 87-97
Markdown (Informal)
[Reddit: A Gold Mine for Personality Prediction](https://aclanthology.org/W18-1112) (Gjurković & Šnajder, PEOPLES 2018)
ACL
- Matej Gjurković and Jan Šnajder. 2018. Reddit: A Gold Mine for Personality Prediction. In Proceedings of the Second Workshop on Computational Modeling of People’s Opinions, Personality, and Emotions in Social Media, pages 87–97, New Orleans, Louisiana, USA. Association for Computational Linguistics.