@inproceedings{ljubesic-etal-2017-language,
title = "Language-independent Gender Prediction on {T}witter",
author = "Ljube{\v{s}}i{\'c}, Nikola and
Fi{\v{s}}er, Darja and
Erjavec, Toma{\v{z}}",
editor = {Hovy, Dirk and
Volkova, Svitlana and
Bamman, David and
Jurgens, David and
O{'}Connor, Brendan and
Tsur, Oren and
Do{\u{g}}ru{\"o}z, A. Seza},
booktitle = "Proceedings of the Second Workshop on {NLP} and Computational Social Science",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2901",
doi = "10.18653/v1/W17-2901",
pages = "1--6",
abstract = "In this paper we present a set of experiments and analyses on predicting the gender of Twitter users based on language-independent features extracted either from the text or the metadata of users{'} tweets. We perform our experiments on the TwiSty dataset containing manual gender annotations for users speaking six different languages. Our classification results show that, while the prediction model based on language-independent features performs worse than the bag-of-words model when training and testing on the same language, it regularly outperforms the bag-of-words model when applied to different languages, showing very stable results across various languages. Finally we perform a comparative analysis of feature effect sizes across the six languages and show that differences in our features correspond to cultural distances.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ljubesic-etal-2017-language">
<titleInfo>
<title>Language-independent Gender Prediction on Twitter</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Darja</namePart>
<namePart type="family">Fišer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on NLP and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oren</namePart>
<namePart type="family">Tsur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a set of experiments and analyses on predicting the gender of Twitter users based on language-independent features extracted either from the text or the metadata of users’ tweets. We perform our experiments on the TwiSty dataset containing manual gender annotations for users speaking six different languages. Our classification results show that, while the prediction model based on language-independent features performs worse than the bag-of-words model when training and testing on the same language, it regularly outperforms the bag-of-words model when applied to different languages, showing very stable results across various languages. Finally we perform a comparative analysis of feature effect sizes across the six languages and show that differences in our features correspond to cultural distances.</abstract>
<identifier type="citekey">ljubesic-etal-2017-language</identifier>
<identifier type="doi">10.18653/v1/W17-2901</identifier>
<location>
<url>https://aclanthology.org/W17-2901</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language-independent Gender Prediction on Twitter
%A Ljubešić, Nikola
%A Fišer, Darja
%A Erjavec, Tomaž
%Y Hovy, Dirk
%Y Volkova, Svitlana
%Y Bamman, David
%Y Jurgens, David
%Y O’Connor, Brendan
%Y Tsur, Oren
%Y Doğruöz, A. Seza
%S Proceedings of the Second Workshop on NLP and Computational Social Science
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F ljubesic-etal-2017-language
%X In this paper we present a set of experiments and analyses on predicting the gender of Twitter users based on language-independent features extracted either from the text or the metadata of users’ tweets. We perform our experiments on the TwiSty dataset containing manual gender annotations for users speaking six different languages. Our classification results show that, while the prediction model based on language-independent features performs worse than the bag-of-words model when training and testing on the same language, it regularly outperforms the bag-of-words model when applied to different languages, showing very stable results across various languages. Finally we perform a comparative analysis of feature effect sizes across the six languages and show that differences in our features correspond to cultural distances.
%R 10.18653/v1/W17-2901
%U https://aclanthology.org/W17-2901
%U https://doi.org/10.18653/v1/W17-2901
%P 1-6
Markdown (Informal)
[Language-independent Gender Prediction on Twitter](https://aclanthology.org/W17-2901) (Ljubešić et al., NLP+CSS 2017)
ACL
- Nikola Ljubešić, Darja Fišer, and Tomaž Erjavec. 2017. Language-independent Gender Prediction on Twitter. In Proceedings of the Second Workshop on NLP and Computational Social Science, pages 1–6, Vancouver, Canada. Association for Computational Linguistics.