@inproceedings{hashempour-2019-deep,
title = "A Deep Learning Approach to Language-independent Gender Prediction on {T}witter",
author = "Hashempour, Reyhaneh",
editor = "Axelrod, Amittai and
Yang, Diyi and
Cunha, Rossana and
Shaikh, Samira and
Waseem, Zeerak",
booktitle = "Proceedings of the 2019 Workshop on Widening NLP",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3630",
pages = "92--94",
abstract = "This work presents a set of experiments conducted to predict the gender of Twitter users based on language-independent features extracted from the text of the users{'} tweets. The experiments were performed on a version of TwiSty dataset including tweets written by the users of six different languages: Portuguese, French, Dutch, English, German, and Italian. Logistic regression (LR), and feed-forward neural networks (FFNN) with back-propagation were used to build models in two different settings: Inter-Lingual (IL) and Cross-Lingual (CL). In the IL setting, the training and testing were performed on the same language whereas in the CL, Italian and German datasets were set aside and only used as test sets and the rest were combined to compose training and development sets. In the IL, the highest accuracy score belongs to LR whereas, in the CL, FFNN with three hidden layers yields the highest score. The results show that neural network based models underperform traditional models when the size of the training set is small; however, they beat traditional models by a non-trivial margin, when they are fed with large enough data. Finally, the feature analysis confirms that men and women have different writing styles independent of their language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hashempour-2019-deep">
<titleInfo>
<title>A Deep Learning Approach to Language-independent Gender Prediction on Twitter</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reyhaneh</namePart>
<namePart type="family">Hashempour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Workshop on Widening NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amittai</namePart>
<namePart type="family">Axelrod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diyi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rossana</namePart>
<namePart type="family">Cunha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Waseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work presents a set of experiments conducted to predict the gender of Twitter users based on language-independent features extracted from the text of the users’ tweets. The experiments were performed on a version of TwiSty dataset including tweets written by the users of six different languages: Portuguese, French, Dutch, English, German, and Italian. Logistic regression (LR), and feed-forward neural networks (FFNN) with back-propagation were used to build models in two different settings: Inter-Lingual (IL) and Cross-Lingual (CL). In the IL setting, the training and testing were performed on the same language whereas in the CL, Italian and German datasets were set aside and only used as test sets and the rest were combined to compose training and development sets. In the IL, the highest accuracy score belongs to LR whereas, in the CL, FFNN with three hidden layers yields the highest score. The results show that neural network based models underperform traditional models when the size of the training set is small; however, they beat traditional models by a non-trivial margin, when they are fed with large enough data. Finally, the feature analysis confirms that men and women have different writing styles independent of their language.</abstract>
<identifier type="citekey">hashempour-2019-deep</identifier>
<location>
<url>https://aclanthology.org/W19-3630</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>92</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Deep Learning Approach to Language-independent Gender Prediction on Twitter
%A Hashempour, Reyhaneh
%Y Axelrod, Amittai
%Y Yang, Diyi
%Y Cunha, Rossana
%Y Shaikh, Samira
%Y Waseem, Zeerak
%S Proceedings of the 2019 Workshop on Widening NLP
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F hashempour-2019-deep
%X This work presents a set of experiments conducted to predict the gender of Twitter users based on language-independent features extracted from the text of the users’ tweets. The experiments were performed on a version of TwiSty dataset including tweets written by the users of six different languages: Portuguese, French, Dutch, English, German, and Italian. Logistic regression (LR), and feed-forward neural networks (FFNN) with back-propagation were used to build models in two different settings: Inter-Lingual (IL) and Cross-Lingual (CL). In the IL setting, the training and testing were performed on the same language whereas in the CL, Italian and German datasets were set aside and only used as test sets and the rest were combined to compose training and development sets. In the IL, the highest accuracy score belongs to LR whereas, in the CL, FFNN with three hidden layers yields the highest score. The results show that neural network based models underperform traditional models when the size of the training set is small; however, they beat traditional models by a non-trivial margin, when they are fed with large enough data. Finally, the feature analysis confirms that men and women have different writing styles independent of their language.
%U https://aclanthology.org/W19-3630
%P 92-94
Markdown (Informal)
[A Deep Learning Approach to Language-independent Gender Prediction on Twitter](https://aclanthology.org/W19-3630) (Hashempour, WiNLP 2019)
ACL