@inproceedings{fahim-2023-aambela,
title = "Aambela at {BLP}-2023 Task 1: Focus on {UNK} tokens: Analyzing Violence Inciting {B}angla Text with Adding Dataset Specific New Word Tokens",
author = "Fahim, Md",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Sadeque, Farig and
Amin, Ruhul",
booktitle = "Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.banglalp-1.24",
doi = "10.18653/v1/2023.banglalp-1.24",
pages = "201--207",
abstract = "The BLP-2023 Task 1 aims to develop a Natural Language Inference system tailored for detecting and analyzing threats from Bangla YouTube comments. Bangla language models like BanglaBERT have demonstrated remarkable performance in various Bangla natural language processing tasks across different domains. We utilized BanglaBERT for the violence detection task, employing three different classification heads. As BanglaBERT{'}s vocabulary lacks certain crucial words, our model incorporates some of them as new special tokens, based on their frequency in the dataset, and their embeddings are learned during training. The model achieved the 2nd position on the leaderboard, boasting an impressive macro-F1 Score of 76.04{\%} on the official test set. With the addition of new tokens, we achieved a 76.90{\%} macro-F1 score, surpassing the top score (76.044{\%}) on the test set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fahim-2023-aambela">
<titleInfo>
<title>Aambela at BLP-2023 Task 1: Focus on UNK tokens: Analyzing Violence Inciting Bangla Text with Adding Dataset Specific New Word Tokens</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="family">Fahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farig</namePart>
<namePart type="family">Sadeque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruhul</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The BLP-2023 Task 1 aims to develop a Natural Language Inference system tailored for detecting and analyzing threats from Bangla YouTube comments. Bangla language models like BanglaBERT have demonstrated remarkable performance in various Bangla natural language processing tasks across different domains. We utilized BanglaBERT for the violence detection task, employing three different classification heads. As BanglaBERT’s vocabulary lacks certain crucial words, our model incorporates some of them as new special tokens, based on their frequency in the dataset, and their embeddings are learned during training. The model achieved the 2nd position on the leaderboard, boasting an impressive macro-F1 Score of 76.04% on the official test set. With the addition of new tokens, we achieved a 76.90% macro-F1 score, surpassing the top score (76.044%) on the test set.</abstract>
<identifier type="citekey">fahim-2023-aambela</identifier>
<identifier type="doi">10.18653/v1/2023.banglalp-1.24</identifier>
<location>
<url>https://aclanthology.org/2023.banglalp-1.24</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>201</start>
<end>207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Aambela at BLP-2023 Task 1: Focus on UNK tokens: Analyzing Violence Inciting Bangla Text with Adding Dataset Specific New Word Tokens
%A Fahim, Md
%Y Alam, Firoj
%Y Kar, Sudipta
%Y Chowdhury, Shammur Absar
%Y Sadeque, Farig
%Y Amin, Ruhul
%S Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F fahim-2023-aambela
%X The BLP-2023 Task 1 aims to develop a Natural Language Inference system tailored for detecting and analyzing threats from Bangla YouTube comments. Bangla language models like BanglaBERT have demonstrated remarkable performance in various Bangla natural language processing tasks across different domains. We utilized BanglaBERT for the violence detection task, employing three different classification heads. As BanglaBERT’s vocabulary lacks certain crucial words, our model incorporates some of them as new special tokens, based on their frequency in the dataset, and their embeddings are learned during training. The model achieved the 2nd position on the leaderboard, boasting an impressive macro-F1 Score of 76.04% on the official test set. With the addition of new tokens, we achieved a 76.90% macro-F1 score, surpassing the top score (76.044%) on the test set.
%R 10.18653/v1/2023.banglalp-1.24
%U https://aclanthology.org/2023.banglalp-1.24
%U https://doi.org/10.18653/v1/2023.banglalp-1.24
%P 201-207
Markdown (Informal)
[Aambela at BLP-2023 Task 1: Focus on UNK tokens: Analyzing Violence Inciting Bangla Text with Adding Dataset Specific New Word Tokens](https://aclanthology.org/2023.banglalp-1.24) (Fahim, BanglaLP 2023)
ACL