@inproceedings{bose-etal-2021-generalisability,
title = "Generalisability of Topic Models in Cross-corpora Abusive Language Detection",
author = "Bose, Tulika and
Illina, Irina and
Fohr, Dominique",
booktitle = "Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nlp4if-1.8",
doi = "10.18653/v1/2021.nlp4if-1.8",
pages = "51--56",
abstract = "Rapidly changing social media content calls for robust and generalisable abuse detection models. However, the state-of-the-art supervised models display degraded performance when they are evaluated on abusive comments that differ from the training corpus. We investigate if the performance of supervised models for cross-corpora abuse detection can be improved by incorporating additional information from topic models, as the latter can infer the latent topic mixtures from unseen samples. In particular, we combine topical information with representations from a model tuned for classifying abusive comments. Our performance analysis reveals that topic models are able to capture abuse-related topics that can transfer across corpora, and result in improved generalisability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bose-etal-2021-generalisability">
<titleInfo>
<title>Generalisability of Topic Models in Cross-corpora Abusive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tulika</namePart>
<namePart type="family">Bose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Illina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominique</namePart>
<namePart type="family">Fohr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Rapidly changing social media content calls for robust and generalisable abuse detection models. However, the state-of-the-art supervised models display degraded performance when they are evaluated on abusive comments that differ from the training corpus. We investigate if the performance of supervised models for cross-corpora abuse detection can be improved by incorporating additional information from topic models, as the latter can infer the latent topic mixtures from unseen samples. In particular, we combine topical information with representations from a model tuned for classifying abusive comments. Our performance analysis reveals that topic models are able to capture abuse-related topics that can transfer across corpora, and result in improved generalisability.</abstract>
<identifier type="citekey">bose-etal-2021-generalisability</identifier>
<identifier type="doi">10.18653/v1/2021.nlp4if-1.8</identifier>
<location>
<url>https://aclanthology.org/2021.nlp4if-1.8</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>51</start>
<end>56</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generalisability of Topic Models in Cross-corpora Abusive Language Detection
%A Bose, Tulika
%A Illina, Irina
%A Fohr, Dominique
%S Proceedings of the Fourth Workshop on NLP for Internet Freedom: Censorship, Disinformation, and Propaganda
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F bose-etal-2021-generalisability
%X Rapidly changing social media content calls for robust and generalisable abuse detection models. However, the state-of-the-art supervised models display degraded performance when they are evaluated on abusive comments that differ from the training corpus. We investigate if the performance of supervised models for cross-corpora abuse detection can be improved by incorporating additional information from topic models, as the latter can infer the latent topic mixtures from unseen samples. In particular, we combine topical information with representations from a model tuned for classifying abusive comments. Our performance analysis reveals that topic models are able to capture abuse-related topics that can transfer across corpora, and result in improved generalisability.
%R 10.18653/v1/2021.nlp4if-1.8
%U https://aclanthology.org/2021.nlp4if-1.8
%U https://doi.org/10.18653/v1/2021.nlp4if-1.8
%P 51-56
Markdown (Informal)
[Generalisability of Topic Models in Cross-corpora Abusive Language Detection](https://aclanthology.org/2021.nlp4if-1.8) (Bose et al., NLP4IF 2021)
ACL