@inproceedings{sazzed-2021-lexicon,
title = "A Lexicon for Profane and Obscene Text Identification in {B}engali",
author = "Sazzed, Salim",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.145",
pages = "1289--1296",
abstract = "Bengali is a low-resource language that lacks tools and resources for profane and obscene textual content detection. Until now, no lexicon exists for detecting obscenity in Bengali social media text. This study introduces a Bengali obscene lexicon consisting of over 200 Bengali terms, which can be considered filthy, slang, profane or obscene. A semi-automatic methodology is presented for developing the profane lexicon that leverages an obscene corpus, word embedding, and part-of-speech (POS) taggers. The developed lexicon achieves coverage of around 0.85 for obscene and profane content detection in an evaluation dataset. The experimental results imply that the developed lexicon is effective at identifying obscenity in Bengali social media content.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sazzed-2021-lexicon">
<titleInfo>
<title>A Lexicon for Profane and Obscene Text Identification in Bengali</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salim</namePart>
<namePart type="family">Sazzed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Bengali is a low-resource language that lacks tools and resources for profane and obscene textual content detection. Until now, no lexicon exists for detecting obscenity in Bengali social media text. This study introduces a Bengali obscene lexicon consisting of over 200 Bengali terms, which can be considered filthy, slang, profane or obscene. A semi-automatic methodology is presented for developing the profane lexicon that leverages an obscene corpus, word embedding, and part-of-speech (POS) taggers. The developed lexicon achieves coverage of around 0.85 for obscene and profane content detection in an evaluation dataset. The experimental results imply that the developed lexicon is effective at identifying obscenity in Bengali social media content.</abstract>
<identifier type="citekey">sazzed-2021-lexicon</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.145</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1289</start>
<end>1296</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Lexicon for Profane and Obscene Text Identification in Bengali
%A Sazzed, Salim
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F sazzed-2021-lexicon
%X Bengali is a low-resource language that lacks tools and resources for profane and obscene textual content detection. Until now, no lexicon exists for detecting obscenity in Bengali social media text. This study introduces a Bengali obscene lexicon consisting of over 200 Bengali terms, which can be considered filthy, slang, profane or obscene. A semi-automatic methodology is presented for developing the profane lexicon that leverages an obscene corpus, word embedding, and part-of-speech (POS) taggers. The developed lexicon achieves coverage of around 0.85 for obscene and profane content detection in an evaluation dataset. The experimental results imply that the developed lexicon is effective at identifying obscenity in Bengali social media content.
%U https://aclanthology.org/2021.ranlp-1.145
%P 1289-1296
Markdown (Informal)
[A Lexicon for Profane and Obscene Text Identification in Bengali](https://aclanthology.org/2021.ranlp-1.145) (Sazzed, RANLP 2021)
ACL