@inproceedings{diaz-torres-etal-2020-automatic,
title = "Automatic Detection of Offensive Language in Social Media: Defining Linguistic Criteria to build a {M}exican {S}panish Dataset",
author = "D{\'\i}az-Torres, Mar{\'\i}a Jos{\'e} and
Mor{\'a}n-M{\'e}ndez, Paulina Alejandra and
Villasenor-Pineda, Luis and
Montes-y-G{\'o}mez, Manuel and
Aguilera, Juan and
Meneses-Ler{\'\i}n, Luis",
editor = "Kumar, Ritesh and
Ojha, Atul Kr. and
Lahiri, Bornini and
Zampieri, Marcos and
Malmasi, Shervin and
Murdock, Vanessa and
Kadar, Daniel",
booktitle = "Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/2020.trac-1.21",
pages = "132--136",
abstract = "Phenomena such as bullying, homophobia, sexism and racism have transcended to social networks, motivating the development of tools for their automatic detection. The challenge becomes greater for languages rich in popular sayings, colloquial expressions and idioms which may contain vulgar, profane or rude words, but not always have the intention of offending, as is the case of Mexican Spanish. Under these circumstances, the identification of the offense goes beyond the lexical and syntactic elements of the message. This first work aims to define the main linguistic features of aggressive, offensive and vulgar language in social networks in order to establish linguistic-based criteria to facilitate the identification of abusive language. For this purpose, a Mexican Spanish Twitter corpus was compiled and analyzed. The dataset included words that, despite being rude, need to be considered in context to determine they are part of an offense. Based on the analysis of this corpus, linguistic criteria were defined to determine whether a message is offensive. To simplify the application of these criteria, an easy-to-follow diagram was designed. The paper presents an example of the use of the diagram, as well as the basic statistics of the corpus.",
language = "English",
ISBN = "979-10-95546-56-6",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="diaz-torres-etal-2020-automatic">
<titleInfo>
<title>Automatic Detection of Offensive Language in Social Media: Defining Linguistic Criteria to build a Mexican Spanish Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Díaz-Torres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paulina</namePart>
<namePart type="given">Alejandra</namePart>
<namePart type="family">Morán-Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Villasenor-Pineda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Montes-y-Gómez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Aguilera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Meneses-Lerín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bornini</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vanessa</namePart>
<namePart type="family">Murdock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Kadar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-56-6</identifier>
</relatedItem>
<abstract>Phenomena such as bullying, homophobia, sexism and racism have transcended to social networks, motivating the development of tools for their automatic detection. The challenge becomes greater for languages rich in popular sayings, colloquial expressions and idioms which may contain vulgar, profane or rude words, but not always have the intention of offending, as is the case of Mexican Spanish. Under these circumstances, the identification of the offense goes beyond the lexical and syntactic elements of the message. This first work aims to define the main linguistic features of aggressive, offensive and vulgar language in social networks in order to establish linguistic-based criteria to facilitate the identification of abusive language. For this purpose, a Mexican Spanish Twitter corpus was compiled and analyzed. The dataset included words that, despite being rude, need to be considered in context to determine they are part of an offense. Based on the analysis of this corpus, linguistic criteria were defined to determine whether a message is offensive. To simplify the application of these criteria, an easy-to-follow diagram was designed. The paper presents an example of the use of the diagram, as well as the basic statistics of the corpus.</abstract>
<identifier type="citekey">diaz-torres-etal-2020-automatic</identifier>
<location>
<url>https://aclanthology.org/2020.trac-1.21</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>132</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Detection of Offensive Language in Social Media: Defining Linguistic Criteria to build a Mexican Spanish Dataset
%A Díaz-Torres, María José
%A Morán-Méndez, Paulina Alejandra
%A Villasenor-Pineda, Luis
%A Montes-y-Gómez, Manuel
%A Aguilera, Juan
%A Meneses-Lerín, Luis
%Y Kumar, Ritesh
%Y Ojha, Atul Kr.
%Y Lahiri, Bornini
%Y Zampieri, Marcos
%Y Malmasi, Shervin
%Y Murdock, Vanessa
%Y Kadar, Daniel
%S Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying
%D 2020
%8 May
%I European Language Resources Association (ELRA)
%C Marseille, France
%@ 979-10-95546-56-6
%G English
%F diaz-torres-etal-2020-automatic
%X Phenomena such as bullying, homophobia, sexism and racism have transcended to social networks, motivating the development of tools for their automatic detection. The challenge becomes greater for languages rich in popular sayings, colloquial expressions and idioms which may contain vulgar, profane or rude words, but not always have the intention of offending, as is the case of Mexican Spanish. Under these circumstances, the identification of the offense goes beyond the lexical and syntactic elements of the message. This first work aims to define the main linguistic features of aggressive, offensive and vulgar language in social networks in order to establish linguistic-based criteria to facilitate the identification of abusive language. For this purpose, a Mexican Spanish Twitter corpus was compiled and analyzed. The dataset included words that, despite being rude, need to be considered in context to determine they are part of an offense. Based on the analysis of this corpus, linguistic criteria were defined to determine whether a message is offensive. To simplify the application of these criteria, an easy-to-follow diagram was designed. The paper presents an example of the use of the diagram, as well as the basic statistics of the corpus.
%U https://aclanthology.org/2020.trac-1.21
%P 132-136
Markdown (Informal)
[Automatic Detection of Offensive Language in Social Media: Defining Linguistic Criteria to build a Mexican Spanish Dataset](https://aclanthology.org/2020.trac-1.21) (Díaz-Torres et al., TRAC 2020)
ACL