@inproceedings{burtenshaw-kestemont-2021-dutch,
title = "A {D}utch Dataset for Cross-lingual Multilabel Toxicity Detection",
author = "Burtenshaw, Ben and
Kestemont, Mike",
editor = "Rapp, Reinhard and
Sharoff, Serge and
Zweigenbaum, Pierre",
booktitle = "Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)",
month = sep,
year = "2021",
address = "Online (Virtual Mode)",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.bucc-1.10",
pages = "75--79",
abstract = "Multi-label toxicity detection is highly prominent, with many research groups, companies, and individuals engaging with it through shared tasks and dedicated venues. This paper describes a cross-lingual approach to annotating multi-label text classification on a newly developed Dutch language dataset, using a model trained on English data. We present an ensemble model of one Transformer model and an LSTM using Multilingual embeddings. The combination of multilingual embeddings and the Transformer model improves performance in a cross-lingual setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="burtenshaw-kestemont-2021-dutch">
<titleInfo>
<title>A Dutch Dataset for Cross-lingual Multilabel Toxicity Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Burtenshaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Kestemont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Online (Virtual Mode)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multi-label toxicity detection is highly prominent, with many research groups, companies, and individuals engaging with it through shared tasks and dedicated venues. This paper describes a cross-lingual approach to annotating multi-label text classification on a newly developed Dutch language dataset, using a model trained on English data. We present an ensemble model of one Transformer model and an LSTM using Multilingual embeddings. The combination of multilingual embeddings and the Transformer model improves performance in a cross-lingual setting.</abstract>
<identifier type="citekey">burtenshaw-kestemont-2021-dutch</identifier>
<location>
<url>https://aclanthology.org/2021.bucc-1.10</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>75</start>
<end>79</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dutch Dataset for Cross-lingual Multilabel Toxicity Detection
%A Burtenshaw, Ben
%A Kestemont, Mike
%Y Rapp, Reinhard
%Y Sharoff, Serge
%Y Zweigenbaum, Pierre
%S Proceedings of the 14th Workshop on Building and Using Comparable Corpora (BUCC 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Online (Virtual Mode)
%F burtenshaw-kestemont-2021-dutch
%X Multi-label toxicity detection is highly prominent, with many research groups, companies, and individuals engaging with it through shared tasks and dedicated venues. This paper describes a cross-lingual approach to annotating multi-label text classification on a newly developed Dutch language dataset, using a model trained on English data. We present an ensemble model of one Transformer model and an LSTM using Multilingual embeddings. The combination of multilingual embeddings and the Transformer model improves performance in a cross-lingual setting.
%U https://aclanthology.org/2021.bucc-1.10
%P 75-79
Markdown (Informal)
[A Dutch Dataset for Cross-lingual Multilabel Toxicity Detection](https://aclanthology.org/2021.bucc-1.10) (Burtenshaw & Kestemont, BUCC 2021)
ACL