@inproceedings{jahan-etal-2022-banglahatebert,
title = "{B}angla{H}ate{BERT}: {BERT} for Abusive Language Detection in {B}engali",
author = "Jahan, Md Saroar and
Haque, Mainul and
Arhab, Nabil and
Oussalah, Mourad",
editor = "Monti, Johanna and
Basile, Valerio and
Buono, Maria Pia Di and
Manna, Raffaele and
Pascucci, Antonio and
Tonelli, Sara",
booktitle = "Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.restup-1.2",
pages = "8--15",
abstract = "This paper introduces BanglaHateBERT, a retrained BERT model for abusive language detection in Bengali. The model was trained with a large-scale Bengali offensive, abusive, and hateful corpus that we have collected from different sources and made available to the public. Furthermore, we have collected and manually annotated 15K Bengali hate speech balanced dataset and made it publicly available for the research community. We used existing pre-trained BanglaBERT model and retrained it with 1.5 million offensive posts. We presented the results of a detailed comparison between generic pre-trained language model and retrained with the abuse-inclined version. In all datasets, BanglaHateBERT outperformed the corresponding available BERT model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jahan-etal-2022-banglahatebert">
<titleInfo>
<title>BanglaHateBERT: BERT for Abusive Language Detection in Bengali</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Saroar</namePart>
<namePart type="family">Jahan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mainul</namePart>
<namePart type="family">Haque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabil</namePart>
<namePart type="family">Arhab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mourad</namePart>
<namePart type="family">Oussalah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raffaele</namePart>
<namePart type="family">Manna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Pascucci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces BanglaHateBERT, a retrained BERT model for abusive language detection in Bengali. The model was trained with a large-scale Bengali offensive, abusive, and hateful corpus that we have collected from different sources and made available to the public. Furthermore, we have collected and manually annotated 15K Bengali hate speech balanced dataset and made it publicly available for the research community. We used existing pre-trained BanglaBERT model and retrained it with 1.5 million offensive posts. We presented the results of a detailed comparison between generic pre-trained language model and retrained with the abuse-inclined version. In all datasets, BanglaHateBERT outperformed the corresponding available BERT model.</abstract>
<identifier type="citekey">jahan-etal-2022-banglahatebert</identifier>
<location>
<url>https://aclanthology.org/2022.restup-1.2</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>8</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BanglaHateBERT: BERT for Abusive Language Detection in Bengali
%A Jahan, Md Saroar
%A Haque, Mainul
%A Arhab, Nabil
%A Oussalah, Mourad
%Y Monti, Johanna
%Y Basile, Valerio
%Y Buono, Maria Pia Di
%Y Manna, Raffaele
%Y Pascucci, Antonio
%Y Tonelli, Sara
%S Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F jahan-etal-2022-banglahatebert
%X This paper introduces BanglaHateBERT, a retrained BERT model for abusive language detection in Bengali. The model was trained with a large-scale Bengali offensive, abusive, and hateful corpus that we have collected from different sources and made available to the public. Furthermore, we have collected and manually annotated 15K Bengali hate speech balanced dataset and made it publicly available for the research community. We used existing pre-trained BanglaBERT model and retrained it with 1.5 million offensive posts. We presented the results of a detailed comparison between generic pre-trained language model and retrained with the abuse-inclined version. In all datasets, BanglaHateBERT outperformed the corresponding available BERT model.
%U https://aclanthology.org/2022.restup-1.2
%P 8-15
Markdown (Informal)
[BanglaHateBERT: BERT for Abusive Language Detection in Bengali](https://aclanthology.org/2022.restup-1.2) (Jahan et al., ResTUP 2022)
ACL
- Md Saroar Jahan, Mainul Haque, Nabil Arhab, and Mourad Oussalah. 2022. BanglaHateBERT: BERT for Abusive Language Detection in Bengali. In Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis, pages 8–15, Marseille, France. European Language Resources Association.