@inproceedings{li-dickinson-2017-gender,
title = "Gender Prediction for {C}hinese Social Media Data",
author = "Li, Wen and
Dickinson, Markus",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_058",
doi = "10.26615/978-954-452-049-6_058",
pages = "438--445",
abstract = "Social media provides users a platform to publish messages and socialize with others, and microblogs have gained more users than ever in recent years. With such usage, user profiling is a popular task in computational linguistics and text mining. Different approaches have been used to predict users{'} gender, age, and other information, but most of this work has been done on English and other Western languages. The goal of this project is to predict the gender of users based on their posts on Weibo, a Chinese micro-blogging platform. Given issues in Chinese word segmentation, we explore character and word n-grams as features for this task, as well as using character and word embeddings for classification. Given how the data is extracted, we approach the task on a per-post basis, and we show the difficulties of the task for both humans and computers. Nonetheless, we present encouraging results and point to future improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-dickinson-2017-gender">
<titleInfo>
<title>Gender Prediction for Chinese Social Media Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Markus</namePart>
<namePart type="family">Dickinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media provides users a platform to publish messages and socialize with others, and microblogs have gained more users than ever in recent years. With such usage, user profiling is a popular task in computational linguistics and text mining. Different approaches have been used to predict users’ gender, age, and other information, but most of this work has been done on English and other Western languages. The goal of this project is to predict the gender of users based on their posts on Weibo, a Chinese micro-blogging platform. Given issues in Chinese word segmentation, we explore character and word n-grams as features for this task, as well as using character and word embeddings for classification. Given how the data is extracted, we approach the task on a per-post basis, and we show the difficulties of the task for both humans and computers. Nonetheless, we present encouraging results and point to future improvements.</abstract>
<identifier type="citekey">li-dickinson-2017-gender</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_058</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>438</start>
<end>445</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender Prediction for Chinese Social Media Data
%A Li, Wen
%A Dickinson, Markus
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F li-dickinson-2017-gender
%X Social media provides users a platform to publish messages and socialize with others, and microblogs have gained more users than ever in recent years. With such usage, user profiling is a popular task in computational linguistics and text mining. Different approaches have been used to predict users’ gender, age, and other information, but most of this work has been done on English and other Western languages. The goal of this project is to predict the gender of users based on their posts on Weibo, a Chinese micro-blogging platform. Given issues in Chinese word segmentation, we explore character and word n-grams as features for this task, as well as using character and word embeddings for classification. Given how the data is extracted, we approach the task on a per-post basis, and we show the difficulties of the task for both humans and computers. Nonetheless, we present encouraging results and point to future improvements.
%R 10.26615/978-954-452-049-6_058
%U https://doi.org/10.26615/978-954-452-049-6_058
%P 438-445
Markdown (Informal)
[Gender Prediction for Chinese Social Media Data](https://doi.org/10.26615/978-954-452-049-6_058) (Li & Dickinson, RANLP 2017)
ACL
- Wen Li and Markus Dickinson. 2017. Gender Prediction for Chinese Social Media Data. In Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017, pages 438–445, Varna, Bulgaria. INCOMA Ltd..