@inproceedings{de-la-pena-sarracen-rosso-2022-unsupervised,
title = "Unsupervised Embeddings with Graph Auto-Encoders for Multi-domain and Multilingual Hate Speech Detection",
author = "De la Pe{\~n}a Sarrac{\'e}n, Gretel Liz and
Rosso, Paolo",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.236",
pages = "2196--2204",
abstract = "Hate speech detection is a prominent and challenging task, since hate messages are often expressed in subtle ways and with characteristics that may vary depending on the author. Hence, many models suffer from the generalization problem. However, retrieving and monitoring hateful content on social media is a current necessity. In this paper, we propose an unsupervised approach using Graph Auto-Encoders (GAE), which allows us to avoid using labeled data when training the representation of the texts. Specifically, we represent texts as nodes of a graph, and use a transformer layer together with a convolutional layer to encode these nodes in a low-dimensional space. As a result, we obtain embeddings that can be decoded into a reconstruction of the original network. Our main idea is to learn a model with a set of texts without supervision, in order to generate embeddings for the nodes: nodes with the same label should be close in the embedding space, which, in turn, should allow us to distinguish among classes. We employ this strategy to detect hate speech in multi-domain and multilingual sets of texts, where our method shows competitive results on small datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-la-pena-sarracen-rosso-2022-unsupervised">
<titleInfo>
<title>Unsupervised Embeddings with Graph Auto-Encoders for Multi-domain and Multilingual Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gretel</namePart>
<namePart type="given">Liz</namePart>
<namePart type="family">De la Peña Sarracén</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Rosso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hate speech detection is a prominent and challenging task, since hate messages are often expressed in subtle ways and with characteristics that may vary depending on the author. Hence, many models suffer from the generalization problem. However, retrieving and monitoring hateful content on social media is a current necessity. In this paper, we propose an unsupervised approach using Graph Auto-Encoders (GAE), which allows us to avoid using labeled data when training the representation of the texts. Specifically, we represent texts as nodes of a graph, and use a transformer layer together with a convolutional layer to encode these nodes in a low-dimensional space. As a result, we obtain embeddings that can be decoded into a reconstruction of the original network. Our main idea is to learn a model with a set of texts without supervision, in order to generate embeddings for the nodes: nodes with the same label should be close in the embedding space, which, in turn, should allow us to distinguish among classes. We employ this strategy to detect hate speech in multi-domain and multilingual sets of texts, where our method shows competitive results on small datasets.</abstract>
<identifier type="citekey">de-la-pena-sarracen-rosso-2022-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.236</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>2196</start>
<end>2204</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Embeddings with Graph Auto-Encoders for Multi-domain and Multilingual Hate Speech Detection
%A De la Peña Sarracén, Gretel Liz
%A Rosso, Paolo
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F de-la-pena-sarracen-rosso-2022-unsupervised
%X Hate speech detection is a prominent and challenging task, since hate messages are often expressed in subtle ways and with characteristics that may vary depending on the author. Hence, many models suffer from the generalization problem. However, retrieving and monitoring hateful content on social media is a current necessity. In this paper, we propose an unsupervised approach using Graph Auto-Encoders (GAE), which allows us to avoid using labeled data when training the representation of the texts. Specifically, we represent texts as nodes of a graph, and use a transformer layer together with a convolutional layer to encode these nodes in a low-dimensional space. As a result, we obtain embeddings that can be decoded into a reconstruction of the original network. Our main idea is to learn a model with a set of texts without supervision, in order to generate embeddings for the nodes: nodes with the same label should be close in the embedding space, which, in turn, should allow us to distinguish among classes. We employ this strategy to detect hate speech in multi-domain and multilingual sets of texts, where our method shows competitive results on small datasets.
%U https://aclanthology.org/2022.lrec-1.236
%P 2196-2204
Markdown (Informal)
[Unsupervised Embeddings with Graph Auto-Encoders for Multi-domain and Multilingual Hate Speech Detection](https://aclanthology.org/2022.lrec-1.236) (De la Peña Sarracén & Rosso, LREC 2022)
ACL