@inproceedings{de-la-pena-sarracen-rosso-2020-prhlt,
title = "{PRHLT}-{UPV} at {S}em{E}val-2020 Task 12: {BERT} for Multilingual Offensive Language Detection",
author = "De la Pe{\~n}a Sarrac{\'e}n, Gretel Liz and
Rosso, Paolo",
editor = "Herbelot, Aurelie and
Zhu, Xiaodan and
Palmer, Alexis and
Schneider, Nathan and
May, Jonathan and
Shutova, Ekaterina",
booktitle = "Proceedings of the Fourteenth Workshop on Semantic Evaluation",
month = dec,
year = "2020",
address = "Barcelona (online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.semeval-1.209",
doi = "10.18653/v1/2020.semeval-1.209",
pages = "1605--1614",
abstract = "The present paper describes the system submitted by the PRHLT-UPV team for the task 12 of SemEval-2020: OffensEval 2020. The official title of the task is Multilingual Offensive Language Identification in Social Media, and aims to identify offensive language in texts. The languages included in the task are English, Arabic, Danish, Greek and Turkish. We propose a model based on the BERT architecture for the analysis of texts in English. The approach leverages knowledge within a pre-trained model and performs fine-tuning for the particular task. In the analysis of the other languages the Multilingual BERT is used, which has been pre-trained for a large number of languages. In the experiments, the proposed method for English texts is compared with other approaches to analyze the relevance of the architecture used. Furthermore, simple models for the other languages are evaluated to compare them with the proposed one. The experimental results show that the model based on BERT outperforms other approaches. The main contribution of this work lies in this study, despite not obtaining the first positions in most cases of the competition ranking.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-la-pena-sarracen-rosso-2020-prhlt">
<titleInfo>
<title>PRHLT-UPV at SemEval-2020 Task 12: BERT for Multilingual Offensive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gretel</namePart>
<namePart type="given">Liz</namePart>
<namePart type="family">De la Peña Sarracén</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Rosso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurelie</namePart>
<namePart type="family">Herbelot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The present paper describes the system submitted by the PRHLT-UPV team for the task 12 of SemEval-2020: OffensEval 2020. The official title of the task is Multilingual Offensive Language Identification in Social Media, and aims to identify offensive language in texts. The languages included in the task are English, Arabic, Danish, Greek and Turkish. We propose a model based on the BERT architecture for the analysis of texts in English. The approach leverages knowledge within a pre-trained model and performs fine-tuning for the particular task. In the analysis of the other languages the Multilingual BERT is used, which has been pre-trained for a large number of languages. In the experiments, the proposed method for English texts is compared with other approaches to analyze the relevance of the architecture used. Furthermore, simple models for the other languages are evaluated to compare them with the proposed one. The experimental results show that the model based on BERT outperforms other approaches. The main contribution of this work lies in this study, despite not obtaining the first positions in most cases of the competition ranking.</abstract>
<identifier type="citekey">de-la-pena-sarracen-rosso-2020-prhlt</identifier>
<identifier type="doi">10.18653/v1/2020.semeval-1.209</identifier>
<location>
<url>https://aclanthology.org/2020.semeval-1.209</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>1605</start>
<end>1614</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PRHLT-UPV at SemEval-2020 Task 12: BERT for Multilingual Offensive Language Detection
%A De la Peña Sarracén, Gretel Liz
%A Rosso, Paolo
%Y Herbelot, Aurelie
%Y Zhu, Xiaodan
%Y Palmer, Alexis
%Y Schneider, Nathan
%Y May, Jonathan
%Y Shutova, Ekaterina
%S Proceedings of the Fourteenth Workshop on Semantic Evaluation
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona (online)
%F de-la-pena-sarracen-rosso-2020-prhlt
%X The present paper describes the system submitted by the PRHLT-UPV team for the task 12 of SemEval-2020: OffensEval 2020. The official title of the task is Multilingual Offensive Language Identification in Social Media, and aims to identify offensive language in texts. The languages included in the task are English, Arabic, Danish, Greek and Turkish. We propose a model based on the BERT architecture for the analysis of texts in English. The approach leverages knowledge within a pre-trained model and performs fine-tuning for the particular task. In the analysis of the other languages the Multilingual BERT is used, which has been pre-trained for a large number of languages. In the experiments, the proposed method for English texts is compared with other approaches to analyze the relevance of the architecture used. Furthermore, simple models for the other languages are evaluated to compare them with the proposed one. The experimental results show that the model based on BERT outperforms other approaches. The main contribution of this work lies in this study, despite not obtaining the first positions in most cases of the competition ranking.
%R 10.18653/v1/2020.semeval-1.209
%U https://aclanthology.org/2020.semeval-1.209
%U https://doi.org/10.18653/v1/2020.semeval-1.209
%P 1605-1614
Markdown (Informal)
[PRHLT-UPV at SemEval-2020 Task 12: BERT for Multilingual Offensive Language Detection](https://aclanthology.org/2020.semeval-1.209) (De la Peña Sarracén & Rosso, SemEval 2020)
ACL