@inproceedings{sezerer-etal-2019-turkish,
title = "A {T}urkish Dataset for Gender Identification of {T}witter Users",
author = "Sezerer, Erhan and
Polatbilek, Ozan and
Tekir, Selma",
editor = "Friedrich, Annemarie and
Zeyrek, Deniz and
Hoek, Jet",
booktitle = "Proceedings of the 13th Linguistic Annotation Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4023",
doi = "10.18653/v1/W19-4023",
pages = "203--207",
abstract = "Author profiling is the identification of an author{'}s gender, age, and language from his/her texts. With the increasing trend of using Twitter as a means to express thought, profiling the gender of an author from his/her tweets has become a challenge. Although several datasets in different languages have been released on this problem, there is still a need for multilingualism. In this work, we propose a dataset of tweets of Turkish Twitter users which are labeled with their gender information. The dataset has 3368 users in training set and 1924 users in test set where each user has 100 tweets. The dataset is publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sezerer-etal-2019-turkish">
<titleInfo>
<title>A Turkish Dataset for Gender Identification of Twitter Users</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erhan</namePart>
<namePart type="family">Sezerer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ozan</namePart>
<namePart type="family">Polatbilek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Selma</namePart>
<namePart type="family">Tekir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jet</namePart>
<namePart type="family">Hoek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Author profiling is the identification of an author’s gender, age, and language from his/her texts. With the increasing trend of using Twitter as a means to express thought, profiling the gender of an author from his/her tweets has become a challenge. Although several datasets in different languages have been released on this problem, there is still a need for multilingualism. In this work, we propose a dataset of tweets of Turkish Twitter users which are labeled with their gender information. The dataset has 3368 users in training set and 1924 users in test set where each user has 100 tweets. The dataset is publicly available.</abstract>
<identifier type="citekey">sezerer-etal-2019-turkish</identifier>
<identifier type="doi">10.18653/v1/W19-4023</identifier>
<location>
<url>https://aclanthology.org/W19-4023</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>203</start>
<end>207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Turkish Dataset for Gender Identification of Twitter Users
%A Sezerer, Erhan
%A Polatbilek, Ozan
%A Tekir, Selma
%Y Friedrich, Annemarie
%Y Zeyrek, Deniz
%Y Hoek, Jet
%S Proceedings of the 13th Linguistic Annotation Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F sezerer-etal-2019-turkish
%X Author profiling is the identification of an author’s gender, age, and language from his/her texts. With the increasing trend of using Twitter as a means to express thought, profiling the gender of an author from his/her tweets has become a challenge. Although several datasets in different languages have been released on this problem, there is still a need for multilingualism. In this work, we propose a dataset of tweets of Turkish Twitter users which are labeled with their gender information. The dataset has 3368 users in training set and 1924 users in test set where each user has 100 tweets. The dataset is publicly available.
%R 10.18653/v1/W19-4023
%U https://aclanthology.org/W19-4023
%U https://doi.org/10.18653/v1/W19-4023
%P 203-207
Markdown (Informal)
[A Turkish Dataset for Gender Identification of Twitter Users](https://aclanthology.org/W19-4023) (Sezerer et al., LAW 2019)
ACL