@inproceedings{luyckx-daelemans-2008-personae,
title = "{P}ersonae: a Corpus for Author and Personality Prediction from Text",
author = "Luyckx, Kim and
Daelemans, Walter",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf",
abstract = "We present a new corpus for computational stylometry, more specifically authorship attribution and the prediction of author personality from text. Because of the large number of authors (145), the corpus will allow previously impossible studies of variation in features considered predictive for writing style. The innovative meta-information (personality profiles of the authors) associated with these texts allows the study of personality prediction, a not yet very well researched aspect of style. In this paper, we describe the contents of the corpus and show its use in both authorship attribution and personality prediction. We focus on features that have been proven useful in the field of author recognition. Syntactic features like part-of-speech n-grams are generally accepted as not being under the authors conscious control and therefore providing good clues for predicting gender or authorship. We want to test whether these features are helpful for personality prediction and authorship attribution on a large set of authors. Both tasks are approached as text categorization tasks. First a document representation is constructed based on feature selection from the linguistically analyzed corpus (using the Memory-Based Shallow Parser (MBSP)). These are associated with each of the 145 authors or each of the four components of the Myers-Briggs Type Indicator (Introverted-Extraverted, Sensing-iNtuitive, Thinking-Feeling, Judging-Perceiving). Authorship attribution on 145 authors achieves results around 50{\%}-accuracy. Preliminary results indicate that the first two personality dimensions can be predicted fairly accurately.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luyckx-daelemans-2008-personae">
<titleInfo>
<title>Personae: a Corpus for Author and Personality Prediction from Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="family">Luyckx</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walter</namePart>
<namePart type="family">Daelemans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a new corpus for computational stylometry, more specifically authorship attribution and the prediction of author personality from text. Because of the large number of authors (145), the corpus will allow previously impossible studies of variation in features considered predictive for writing style. The innovative meta-information (personality profiles of the authors) associated with these texts allows the study of personality prediction, a not yet very well researched aspect of style. In this paper, we describe the contents of the corpus and show its use in both authorship attribution and personality prediction. We focus on features that have been proven useful in the field of author recognition. Syntactic features like part-of-speech n-grams are generally accepted as not being under the authors conscious control and therefore providing good clues for predicting gender or authorship. We want to test whether these features are helpful for personality prediction and authorship attribution on a large set of authors. Both tasks are approached as text categorization tasks. First a document representation is constructed based on feature selection from the linguistically analyzed corpus (using the Memory-Based Shallow Parser (MBSP)). These are associated with each of the 145 authors or each of the four components of the Myers-Briggs Type Indicator (Introverted-Extraverted, Sensing-iNtuitive, Thinking-Feeling, Judging-Perceiving). Authorship attribution on 145 authors achieves results around 50%-accuracy. Preliminary results indicate that the first two personality dimensions can be predicted fairly accurately.</abstract>
<identifier type="citekey">luyckx-daelemans-2008-personae</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Personae: a Corpus for Author and Personality Prediction from Text
%A Luyckx, Kim
%A Daelemans, Walter
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F luyckx-daelemans-2008-personae
%X We present a new corpus for computational stylometry, more specifically authorship attribution and the prediction of author personality from text. Because of the large number of authors (145), the corpus will allow previously impossible studies of variation in features considered predictive for writing style. The innovative meta-information (personality profiles of the authors) associated with these texts allows the study of personality prediction, a not yet very well researched aspect of style. In this paper, we describe the contents of the corpus and show its use in both authorship attribution and personality prediction. We focus on features that have been proven useful in the field of author recognition. Syntactic features like part-of-speech n-grams are generally accepted as not being under the authors conscious control and therefore providing good clues for predicting gender or authorship. We want to test whether these features are helpful for personality prediction and authorship attribution on a large set of authors. Both tasks are approached as text categorization tasks. First a document representation is constructed based on feature selection from the linguistically analyzed corpus (using the Memory-Based Shallow Parser (MBSP)). These are associated with each of the 145 authors or each of the four components of the Myers-Briggs Type Indicator (Introverted-Extraverted, Sensing-iNtuitive, Thinking-Feeling, Judging-Perceiving). Authorship attribution on 145 authors achieves results around 50%-accuracy. Preliminary results indicate that the first two personality dimensions can be predicted fairly accurately.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf
Markdown (Informal)
[Personae: a Corpus for Author and Personality Prediction from Text](http://www.lrec-conf.org/proceedings/lrec2008/pdf/759_paper.pdf) (Luyckx & Daelemans, LREC 2008)
ACL