@inproceedings{pethe-skiena-2019-trumpiest,
title = "The Trumpiest Trump? Identifying a Subject{'}s Most Characteristic Tweets",
author = "Pethe, Charuta and
Skiena, Steve",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1175",
doi = "10.18653/v1/D19-1175",
pages = "1653--1663",
abstract = "The sequence of documents produced by any given author varies in style and content, but some documents are more typical or representative of the source than others. We quantify the extent to which a given short text is characteristic of a specific person, using a dataset of tweets from fifteen celebrities. Such analysis is useful for generating excerpts of high-volume Twitter profiles, and understanding how representativeness relates to tweet popularity. We first consider the related task of binary author detection (is x the author of text T?), and report a test accuracy of 90.37{\%} for the best of five approaches to this problem. We then use these models to compute characterization scores among all of an author{'}s texts. A user study shows human evaluators agree with our characterization model for all 15 celebrities in our dataset, each with p-value {\textless} 0.05. We use these classifiers to show surprisingly strong correlations between characterization scores and the popularity of the associated texts. Indeed, we demonstrate a statistically significant correlation between this score and tweet popularity (likes/replies/retweets) for 13 of the 15 celebrities in our study.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pethe-skiena-2019-trumpiest">
<titleInfo>
<title>The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Charuta</namePart>
<namePart type="family">Pethe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steve</namePart>
<namePart type="family">Skiena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The sequence of documents produced by any given author varies in style and content, but some documents are more typical or representative of the source than others. We quantify the extent to which a given short text is characteristic of a specific person, using a dataset of tweets from fifteen celebrities. Such analysis is useful for generating excerpts of high-volume Twitter profiles, and understanding how representativeness relates to tweet popularity. We first consider the related task of binary author detection (is x the author of text T?), and report a test accuracy of 90.37% for the best of five approaches to this problem. We then use these models to compute characterization scores among all of an author’s texts. A user study shows human evaluators agree with our characterization model for all 15 celebrities in our dataset, each with p-value \textless 0.05. We use these classifiers to show surprisingly strong correlations between characterization scores and the popularity of the associated texts. Indeed, we demonstrate a statistically significant correlation between this score and tweet popularity (likes/replies/retweets) for 13 of the 15 celebrities in our study.</abstract>
<identifier type="citekey">pethe-skiena-2019-trumpiest</identifier>
<identifier type="doi">10.18653/v1/D19-1175</identifier>
<location>
<url>https://aclanthology.org/D19-1175</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>1653</start>
<end>1663</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets
%A Pethe, Charuta
%A Skiena, Steve
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F pethe-skiena-2019-trumpiest
%X The sequence of documents produced by any given author varies in style and content, but some documents are more typical or representative of the source than others. We quantify the extent to which a given short text is characteristic of a specific person, using a dataset of tweets from fifteen celebrities. Such analysis is useful for generating excerpts of high-volume Twitter profiles, and understanding how representativeness relates to tweet popularity. We first consider the related task of binary author detection (is x the author of text T?), and report a test accuracy of 90.37% for the best of five approaches to this problem. We then use these models to compute characterization scores among all of an author’s texts. A user study shows human evaluators agree with our characterization model for all 15 celebrities in our dataset, each with p-value \textless 0.05. We use these classifiers to show surprisingly strong correlations between characterization scores and the popularity of the associated texts. Indeed, we demonstrate a statistically significant correlation between this score and tweet popularity (likes/replies/retweets) for 13 of the 15 celebrities in our study.
%R 10.18653/v1/D19-1175
%U https://aclanthology.org/D19-1175
%U https://doi.org/10.18653/v1/D19-1175
%P 1653-1663
Markdown (Informal)
[The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets](https://aclanthology.org/D19-1175) (Pethe & Skiena, EMNLP-IJCNLP 2019)
ACL
- Charuta Pethe and Steve Skiena. 2019. The Trumpiest Trump? Identifying a Subject’s Most Characteristic Tweets. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 1653–1663, Hong Kong, China. Association for Computational Linguistics.