@InProceedings{li-dickinson:2017:RANLP,
  author    = {Li, Wen  and  Dickinson, Markus},
  title     = {Gender Prediction for Chinese Social Media Data},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {438--445},
  abstract  = {Social media provides users a platform to publish messages and socialize with
	others, and microblogs have gained more users than ever in recent years. With
	such usage, user profiling is a popular task in computational linguistics and
	text mining. Different approaches have been used to predict users’ gender,
	age, and other information, but most of this work has been done on English and
	other Western languages. The goal of this project is to predict the gender of
	users based on their posts on Weibo, a Chinese micro-blogging platform. Given
	issues in Chinese word segmentation, we explore character and word n-grams as
	features for this task, as well as using character and word embeddings for
	classification. Given how the data is extracted, we approach the task on a per-
	post basis, and we show the difficulties of the task for both humans and
	computers. Nonetheless, we present encouraging results and point to future
	improvements.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_058}
}

