@inproceedings{elsafoury-etal-2022-comparative,
title = "A Comparative Study on Word Embeddings and Social {NLP} Tasks",
author = "Elsafoury, Fatma and
Wilson, Steven R. and
Ramzan, Naeem",
editor = "Ku, Lun-Wei and
Li, Cheng-Te and
Tsai, Yu-Che and
Wang, Wei-Yao",
booktitle = "Proceedings of the Tenth International Workshop on Natural Language Processing for Social Media",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.socialnlp-1.5",
doi = "10.18653/v1/2022.socialnlp-1.5",
pages = "55--64",
abstract = "In recent years, gray social media platforms, those with a loose moderation policy on cyberbullying, have been attracting more users. Recently, data collected from these types of platforms have been used to pre-train word embeddings (social-media-based), yet these word embeddings have not been investigated for social NLP related tasks. In this paper, we carried out a comparative study between social-media-based and non-social-media-based word embeddings on two social NLP tasks: Detecting cyberbullying and Measuring social bias. Our results show that using social-media-based word embeddings as input features, rather than non-social-media-based embeddings, leads to better cyberbullying detection performance. We also show that some word embeddings are more useful than others for categorizing offensive words. However, we do not find strong evidence that certain word embeddings will necessarily work best when identifying certain categories of cyberbullying within our datasets. Finally, We show even though most of the state-of-the-art bias metrics ranked social-media-based word embeddings as the most socially biased, these results remain inconclusive and further research is required.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elsafoury-etal-2022-comparative">
<titleInfo>
<title>A Comparative Study on Word Embeddings and Social NLP Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fatma</namePart>
<namePart type="family">Elsafoury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naeem</namePart>
<namePart type="family">Ramzan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Workshop on Natural Language Processing for Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Te</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Che</namePart>
<namePart type="family">Tsai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei-Yao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, gray social media platforms, those with a loose moderation policy on cyberbullying, have been attracting more users. Recently, data collected from these types of platforms have been used to pre-train word embeddings (social-media-based), yet these word embeddings have not been investigated for social NLP related tasks. In this paper, we carried out a comparative study between social-media-based and non-social-media-based word embeddings on two social NLP tasks: Detecting cyberbullying and Measuring social bias. Our results show that using social-media-based word embeddings as input features, rather than non-social-media-based embeddings, leads to better cyberbullying detection performance. We also show that some word embeddings are more useful than others for categorizing offensive words. However, we do not find strong evidence that certain word embeddings will necessarily work best when identifying certain categories of cyberbullying within our datasets. Finally, We show even though most of the state-of-the-art bias metrics ranked social-media-based word embeddings as the most socially biased, these results remain inconclusive and further research is required.</abstract>
<identifier type="citekey">elsafoury-etal-2022-comparative</identifier>
<identifier type="doi">10.18653/v1/2022.socialnlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.socialnlp-1.5</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>55</start>
<end>64</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparative Study on Word Embeddings and Social NLP Tasks
%A Elsafoury, Fatma
%A Wilson, Steven R.
%A Ramzan, Naeem
%Y Ku, Lun-Wei
%Y Li, Cheng-Te
%Y Tsai, Yu-Che
%Y Wang, Wei-Yao
%S Proceedings of the Tenth International Workshop on Natural Language Processing for Social Media
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F elsafoury-etal-2022-comparative
%X In recent years, gray social media platforms, those with a loose moderation policy on cyberbullying, have been attracting more users. Recently, data collected from these types of platforms have been used to pre-train word embeddings (social-media-based), yet these word embeddings have not been investigated for social NLP related tasks. In this paper, we carried out a comparative study between social-media-based and non-social-media-based word embeddings on two social NLP tasks: Detecting cyberbullying and Measuring social bias. Our results show that using social-media-based word embeddings as input features, rather than non-social-media-based embeddings, leads to better cyberbullying detection performance. We also show that some word embeddings are more useful than others for categorizing offensive words. However, we do not find strong evidence that certain word embeddings will necessarily work best when identifying certain categories of cyberbullying within our datasets. Finally, We show even though most of the state-of-the-art bias metrics ranked social-media-based word embeddings as the most socially biased, these results remain inconclusive and further research is required.
%R 10.18653/v1/2022.socialnlp-1.5
%U https://aclanthology.org/2022.socialnlp-1.5
%U https://doi.org/10.18653/v1/2022.socialnlp-1.5
%P 55-64
Markdown (Informal)
[A Comparative Study on Word Embeddings and Social NLP Tasks](https://aclanthology.org/2022.socialnlp-1.5) (Elsafoury et al., SocialNLP 2022)
ACL
- Fatma Elsafoury, Steven R. Wilson, and Naeem Ramzan. 2022. A Comparative Study on Word Embeddings and Social NLP Tasks. In Proceedings of the Tenth International Workshop on Natural Language Processing for Social Media, pages 55–64, Seattle, Washington. Association for Computational Linguistics.