@inproceedings{ruitenbeek-etal-2022-zo,
title = "{\textquotedblleft}Zo Grof !{\textquotedblright}: A Comprehensive Corpus for Offensive and Abusive Language in {D}utch",
author = "Ruitenbeek, Ward and
Zwart, Victor and
Van Der Noord, Robin and
Gnezdilov, Zhenja and
Caselli, Tommaso",
editor = "Narang, Kanika and
Mostafazadeh Davani, Aida and
Mathias, Lambert and
Vidgen, Bertie and
Talat, Zeerak",
booktitle = "Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)",
month = jul,
year = "2022",
address = "Seattle, Washington (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.woah-1.5/",
doi = "10.18653/v1/2022.woah-1.5",
pages = "40--56",
abstract = "This paper presents a comprehensive corpus for the study of socially unacceptable language in Dutch. The corpus extends and revise an existing resource with more data and introduces a new annotation dimension for offensive language, making it a unique resource in the Dutch language panorama. Each language phenomenon (abusive and offensive language) in the corpus has been annotated with a multi-layer annotation scheme modelling the explicitness and the target(s) of the message. We have conducted a new set of experiments with different classification algorithms on all annotation dimensions. Monolingual Pre-Trained Language Models prove as the best systems, obtaining a macro-average F1 of 0.828 for binary classification of offensive language, and 0.579 for the targets of offensive messages. Furthermore, the best system obtains a macro-average F1 of 0.667 for distinguishing between abusive and offensive messages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ruitenbeek-etal-2022-zo">
<titleInfo>
<title>“Zo Grof !”: A Comprehensive Corpus for Offensive and Abusive Language in Dutch</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ward</namePart>
<namePart type="family">Ruitenbeek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victor</namePart>
<namePart type="family">Zwart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Van Der Noord</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenja</namePart>
<namePart type="family">Gnezdilov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommaso</namePart>
<namePart type="family">Caselli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kanika</namePart>
<namePart type="family">Narang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Mostafazadeh Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lambert</namePart>
<namePart type="family">Mathias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bertie</namePart>
<namePart type="family">Vidgen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a comprehensive corpus for the study of socially unacceptable language in Dutch. The corpus extends and revise an existing resource with more data and introduces a new annotation dimension for offensive language, making it a unique resource in the Dutch language panorama. Each language phenomenon (abusive and offensive language) in the corpus has been annotated with a multi-layer annotation scheme modelling the explicitness and the target(s) of the message. We have conducted a new set of experiments with different classification algorithms on all annotation dimensions. Monolingual Pre-Trained Language Models prove as the best systems, obtaining a macro-average F1 of 0.828 for binary classification of offensive language, and 0.579 for the targets of offensive messages. Furthermore, the best system obtains a macro-average F1 of 0.667 for distinguishing between abusive and offensive messages.</abstract>
<identifier type="citekey">ruitenbeek-etal-2022-zo</identifier>
<identifier type="doi">10.18653/v1/2022.woah-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.woah-1.5/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>40</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T “Zo Grof !”: A Comprehensive Corpus for Offensive and Abusive Language in Dutch
%A Ruitenbeek, Ward
%A Zwart, Victor
%A Van Der Noord, Robin
%A Gnezdilov, Zhenja
%A Caselli, Tommaso
%Y Narang, Kanika
%Y Mostafazadeh Davani, Aida
%Y Mathias, Lambert
%Y Vidgen, Bertie
%Y Talat, Zeerak
%S Proceedings of the Sixth Workshop on Online Abuse and Harms (WOAH)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington (Hybrid)
%F ruitenbeek-etal-2022-zo
%X This paper presents a comprehensive corpus for the study of socially unacceptable language in Dutch. The corpus extends and revise an existing resource with more data and introduces a new annotation dimension for offensive language, making it a unique resource in the Dutch language panorama. Each language phenomenon (abusive and offensive language) in the corpus has been annotated with a multi-layer annotation scheme modelling the explicitness and the target(s) of the message. We have conducted a new set of experiments with different classification algorithms on all annotation dimensions. Monolingual Pre-Trained Language Models prove as the best systems, obtaining a macro-average F1 of 0.828 for binary classification of offensive language, and 0.579 for the targets of offensive messages. Furthermore, the best system obtains a macro-average F1 of 0.667 for distinguishing between abusive and offensive messages.
%R 10.18653/v1/2022.woah-1.5
%U https://aclanthology.org/2022.woah-1.5/
%U https://doi.org/10.18653/v1/2022.woah-1.5
%P 40-56
Markdown (Informal)
[“Zo Grof !”: A Comprehensive Corpus for Offensive and Abusive Language in Dutch](https://aclanthology.org/2022.woah-1.5/) (Ruitenbeek et al., WOAH 2022)
ACL