@inproceedings{costa-jussa-etal-2020-abusive,
title = "Abusive language in {S}panish children and young teenager{'}s conversations: data preparation and short text classification with contextual word embeddings",
author = "Costa-juss{\`a}, Marta R. and
Gonz{\'a}lez, Esther and
Moreno, Asuncion and
Cumalat, Eudald",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.191",
pages = "1533--1537",
abstract = "Abusive texts are reaching the interests of the scientific and social community. How to automatically detect them is onequestion that is gaining interest in the natural language processing community. The main contribution of this paper is toevaluate the quality of the recently developed {''}Spanish Database for cyberbullying prevention{''} for the purpose of trainingclassifiers on detecting abusive short texts. We compare classical machine learning techniques to the use of a more ad-vanced model: the contextual word embeddings in the particular case of classification of abusive short-texts for the Spanishlanguage. As contextual word embeddings, we use Bidirectional Encoder Representation from Transformers (BERT), pro-posed at the end of 2018. We show that BERT mostly outperforms classical techniques. Far beyond the experimentalimpact of our research, this project aims at planting the seeds for an innovative technological tool with a high potentialsocial impact and aiming at being part of the initiatives in artificial intelligence for social good.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="costa-jussa-etal-2020-abusive">
<titleInfo>
<title>Abusive language in Spanish children and young teenager’s conversations: data preparation and short text classification with contextual word embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esther</namePart>
<namePart type="family">González</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eudald</namePart>
<namePart type="family">Cumalat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Abusive texts are reaching the interests of the scientific and social community. How to automatically detect them is onequestion that is gaining interest in the natural language processing community. The main contribution of this paper is toevaluate the quality of the recently developed ”Spanish Database for cyberbullying prevention” for the purpose of trainingclassifiers on detecting abusive short texts. We compare classical machine learning techniques to the use of a more ad-vanced model: the contextual word embeddings in the particular case of classification of abusive short-texts for the Spanishlanguage. As contextual word embeddings, we use Bidirectional Encoder Representation from Transformers (BERT), pro-posed at the end of 2018. We show that BERT mostly outperforms classical techniques. Far beyond the experimentalimpact of our research, this project aims at planting the seeds for an innovative technological tool with a high potentialsocial impact and aiming at being part of the initiatives in artificial intelligence for social good.</abstract>
<identifier type="citekey">costa-jussa-etal-2020-abusive</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.191</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>1533</start>
<end>1537</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Abusive language in Spanish children and young teenager’s conversations: data preparation and short text classification with contextual word embeddings
%A Costa-jussà, Marta R.
%A González, Esther
%A Moreno, Asuncion
%A Cumalat, Eudald
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F costa-jussa-etal-2020-abusive
%X Abusive texts are reaching the interests of the scientific and social community. How to automatically detect them is onequestion that is gaining interest in the natural language processing community. The main contribution of this paper is toevaluate the quality of the recently developed ”Spanish Database for cyberbullying prevention” for the purpose of trainingclassifiers on detecting abusive short texts. We compare classical machine learning techniques to the use of a more ad-vanced model: the contextual word embeddings in the particular case of classification of abusive short-texts for the Spanishlanguage. As contextual word embeddings, we use Bidirectional Encoder Representation from Transformers (BERT), pro-posed at the end of 2018. We show that BERT mostly outperforms classical techniques. Far beyond the experimentalimpact of our research, this project aims at planting the seeds for an innovative technological tool with a high potentialsocial impact and aiming at being part of the initiatives in artificial intelligence for social good.
%U https://aclanthology.org/2020.lrec-1.191
%P 1533-1537
Markdown (Informal)
[Abusive language in Spanish children and young teenager’s conversations: data preparation and short text classification with contextual word embeddings](https://aclanthology.org/2020.lrec-1.191) (Costa-jussà et al., LREC 2020)
ACL