@inproceedings{castillo-lopez-etal-2023-analyzing,
title = "Analyzing Zero-Shot transfer Scenarios across {S}panish variants for Hate Speech Detection",
author = "Castillo-l{\'o}pez, Galo and
Riabi, Arij and
Seddah, Djam{\'e}",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, J{\"o}rg and
Zampieri, Marcos},
booktitle = "Tenth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.vardial-1.1",
doi = "10.18653/v1/2023.vardial-1.1",
pages = "1--13",
abstract = "Hate speech detection in online platforms has been widely studied inthe past. Most of these works were conducted in English and afew rich-resource languages. Recent approaches tailored forlow-resource languages have explored the interests of zero-shot cross-lingual transfer learning models in resource-scarce scenarios. However, languages variations between geolects such as AmericanEnglish and British English, Latin-American Spanish, and EuropeanSpanish is still a problem for NLP models that often relies on(latent) lexical information for their classification tasks. Moreimportantly, the cultural aspect, crucial for hate speech detection,is often overlooked. In this work, we present the results of a thorough analysis of hatespeech detection models performance on different variants of Spanish,including a new hate speech toward immigrants Twitter data set we built to cover these variants. Using mBERT and Beto, a monolingual Spanish Bert-based language model, as the basis of our transfer learning architecture, our results indicate that hate speech detection models for a given Spanish variant are affected when different variations of such language are not considered. Hate speech expressions could vary from region to region where the same language is spoken. Our new dataset, models and guidelines are freely available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="castillo-lopez-etal-2023-analyzing">
<titleInfo>
<title>Analyzing Zero-Shot transfer Scenarios across Spanish variants for Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galo</namePart>
<namePart type="family">Castillo-lópez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arij</namePart>
<namePart type="family">Riabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Djamé</namePart>
<namePart type="family">Seddah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Tenth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hate speech detection in online platforms has been widely studied inthe past. Most of these works were conducted in English and afew rich-resource languages. Recent approaches tailored forlow-resource languages have explored the interests of zero-shot cross-lingual transfer learning models in resource-scarce scenarios. However, languages variations between geolects such as AmericanEnglish and British English, Latin-American Spanish, and EuropeanSpanish is still a problem for NLP models that often relies on(latent) lexical information for their classification tasks. Moreimportantly, the cultural aspect, crucial for hate speech detection,is often overlooked. In this work, we present the results of a thorough analysis of hatespeech detection models performance on different variants of Spanish,including a new hate speech toward immigrants Twitter data set we built to cover these variants. Using mBERT and Beto, a monolingual Spanish Bert-based language model, as the basis of our transfer learning architecture, our results indicate that hate speech detection models for a given Spanish variant are affected when different variations of such language are not considered. Hate speech expressions could vary from region to region where the same language is spoken. Our new dataset, models and guidelines are freely available.</abstract>
<identifier type="citekey">castillo-lopez-etal-2023-analyzing</identifier>
<identifier type="doi">10.18653/v1/2023.vardial-1.1</identifier>
<location>
<url>https://aclanthology.org/2023.vardial-1.1</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analyzing Zero-Shot transfer Scenarios across Spanish variants for Hate Speech Detection
%A Castillo-lópez, Galo
%A Riabi, Arij
%A Seddah, Djamé
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%Y Zampieri, Marcos
%S Tenth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F castillo-lopez-etal-2023-analyzing
%X Hate speech detection in online platforms has been widely studied inthe past. Most of these works were conducted in English and afew rich-resource languages. Recent approaches tailored forlow-resource languages have explored the interests of zero-shot cross-lingual transfer learning models in resource-scarce scenarios. However, languages variations between geolects such as AmericanEnglish and British English, Latin-American Spanish, and EuropeanSpanish is still a problem for NLP models that often relies on(latent) lexical information for their classification tasks. Moreimportantly, the cultural aspect, crucial for hate speech detection,is often overlooked. In this work, we present the results of a thorough analysis of hatespeech detection models performance on different variants of Spanish,including a new hate speech toward immigrants Twitter data set we built to cover these variants. Using mBERT and Beto, a monolingual Spanish Bert-based language model, as the basis of our transfer learning architecture, our results indicate that hate speech detection models for a given Spanish variant are affected when different variations of such language are not considered. Hate speech expressions could vary from region to region where the same language is spoken. Our new dataset, models and guidelines are freely available.
%R 10.18653/v1/2023.vardial-1.1
%U https://aclanthology.org/2023.vardial-1.1
%U https://doi.org/10.18653/v1/2023.vardial-1.1
%P 1-13
Markdown (Informal)
[Analyzing Zero-Shot transfer Scenarios across Spanish variants for Hate Speech Detection](https://aclanthology.org/2023.vardial-1.1) (Castillo-lópez et al., VarDial 2023)
ACL