@inproceedings{bose-etal-2021-unsupervised,
title = "Unsupervised Domain Adaptation in Cross-corpora Abusive Language Detection",
author = "Bose, Tulika and
Illina, Irina and
Fohr, Dominique",
editor = "Ku, Lun-Wei and
Li, Cheng-Te",
booktitle = "Proceedings of the Ninth International Workshop on Natural Language Processing for Social Media",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.socialnlp-1.10",
doi = "10.18653/v1/2021.socialnlp-1.10",
pages = "113--122",
abstract = "The state-of-the-art abusive language detection models report great in-corpus performance, but underperform when evaluated on abusive comments that differ from the training scenario. As human annotation involves substantial time and effort, models that can adapt to newly collected comments can prove to be useful. In this paper, we investigate the effectiveness of several Unsupervised Domain Adaptation (UDA) approaches for the task of cross-corpora abusive language detection. In comparison, we adapt a variant of the BERT model, trained on large-scale abusive comments, using Masked Language Model (MLM) fine-tuning. Our evaluation shows that the UDA approaches result in sub-optimal performance, while the MLM fine-tuning does better in the cross-corpora setting. Detailed analysis reveals the limitations of the UDA approaches and emphasizes the need to build efficient adaptation methods for this task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bose-etal-2021-unsupervised">
<titleInfo>
<title>Unsupervised Domain Adaptation in Cross-corpora Abusive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tulika</namePart>
<namePart type="family">Bose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Illina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominique</namePart>
<namePart type="family">Fohr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Workshop on Natural Language Processing for Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Te</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The state-of-the-art abusive language detection models report great in-corpus performance, but underperform when evaluated on abusive comments that differ from the training scenario. As human annotation involves substantial time and effort, models that can adapt to newly collected comments can prove to be useful. In this paper, we investigate the effectiveness of several Unsupervised Domain Adaptation (UDA) approaches for the task of cross-corpora abusive language detection. In comparison, we adapt a variant of the BERT model, trained on large-scale abusive comments, using Masked Language Model (MLM) fine-tuning. Our evaluation shows that the UDA approaches result in sub-optimal performance, while the MLM fine-tuning does better in the cross-corpora setting. Detailed analysis reveals the limitations of the UDA approaches and emphasizes the need to build efficient adaptation methods for this task.</abstract>
<identifier type="citekey">bose-etal-2021-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2021.socialnlp-1.10</identifier>
<location>
<url>https://aclanthology.org/2021.socialnlp-1.10</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>113</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Domain Adaptation in Cross-corpora Abusive Language Detection
%A Bose, Tulika
%A Illina, Irina
%A Fohr, Dominique
%Y Ku, Lun-Wei
%Y Li, Cheng-Te
%S Proceedings of the Ninth International Workshop on Natural Language Processing for Social Media
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F bose-etal-2021-unsupervised
%X The state-of-the-art abusive language detection models report great in-corpus performance, but underperform when evaluated on abusive comments that differ from the training scenario. As human annotation involves substantial time and effort, models that can adapt to newly collected comments can prove to be useful. In this paper, we investigate the effectiveness of several Unsupervised Domain Adaptation (UDA) approaches for the task of cross-corpora abusive language detection. In comparison, we adapt a variant of the BERT model, trained on large-scale abusive comments, using Masked Language Model (MLM) fine-tuning. Our evaluation shows that the UDA approaches result in sub-optimal performance, while the MLM fine-tuning does better in the cross-corpora setting. Detailed analysis reveals the limitations of the UDA approaches and emphasizes the need to build efficient adaptation methods for this task.
%R 10.18653/v1/2021.socialnlp-1.10
%U https://aclanthology.org/2021.socialnlp-1.10
%U https://doi.org/10.18653/v1/2021.socialnlp-1.10
%P 113-122
Markdown (Informal)
[Unsupervised Domain Adaptation in Cross-corpora Abusive Language Detection](https://aclanthology.org/2021.socialnlp-1.10) (Bose et al., SocialNLP 2021)
ACL