@inproceedings{manukonda-kodali-2024-bytellm,
title = "byte{LLM}@{LT}-{EDI}-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with {S}ubword2{V}ec and {B}i{LSTM}",
author = "Manukonda, Durga and
Kodali, Rohith",
editor = {Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Durairaj, Thenmozhi and
Kov{\'a}cs, Gy{\"o}rgy and
Garc{\'\i}a Cumbreras, Miguel {\'A}ngel},
booktitle = "Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.ltedi-1.16",
pages = "157--163",
abstract = "This research focuses on Homophobia and Transphobia Detection in Dravidian languages, specifically Telugu, Kannada, Tamil, and Malayalam. Leveraging the Homophobia/ Transphobia Detection dataset, we propose an innovative approach employing a custom-designed tokenizer with a Bidirectional Long Short-Term Memory (BiLSTM) architecture. Our distinctive contribution lies in a tokenizer that reduces model sizes to below 7MB, improving efficiency and addressing real-time deployment challenges. The BiLSTM implementation demonstrates significant enhancements in hate speech detection accuracy, effectively capturing linguistic nuances. Low-size models efficiently alleviate inference challenges, ensuring swift real-time detection and practical deployment. This work pioneers a framework for hate speech detection, providing insights into model size, inference speed, and real-time deployment challenges in combatting online hate speech within Dravidian languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manukonda-kodali-2024-bytellm">
<titleInfo>
<title>byteLLM@LT-EDI-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with Subword2Vec and BiLSTM</title>
</titleInfo>
<name type="personal">
<namePart type="given">Durga</namePart>
<namePart type="family">Manukonda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rohith</namePart>
<namePart type="family">Kodali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thenmozhi</namePart>
<namePart type="family">Durairaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">György</namePart>
<namePart type="family">Kovács</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">García Cumbreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This research focuses on Homophobia and Transphobia Detection in Dravidian languages, specifically Telugu, Kannada, Tamil, and Malayalam. Leveraging the Homophobia/ Transphobia Detection dataset, we propose an innovative approach employing a custom-designed tokenizer with a Bidirectional Long Short-Term Memory (BiLSTM) architecture. Our distinctive contribution lies in a tokenizer that reduces model sizes to below 7MB, improving efficiency and addressing real-time deployment challenges. The BiLSTM implementation demonstrates significant enhancements in hate speech detection accuracy, effectively capturing linguistic nuances. Low-size models efficiently alleviate inference challenges, ensuring swift real-time detection and practical deployment. This work pioneers a framework for hate speech detection, providing insights into model size, inference speed, and real-time deployment challenges in combatting online hate speech within Dravidian languages.</abstract>
<identifier type="citekey">manukonda-kodali-2024-bytellm</identifier>
<location>
<url>https://aclanthology.org/2024.ltedi-1.16</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>157</start>
<end>163</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T byteLLM@LT-EDI-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with Subword2Vec and BiLSTM
%A Manukonda, Durga
%A Kodali, Rohith
%Y Chakravarthi, Bharathi Raja
%Y B, Bharathi
%Y Buitelaar, Paul
%Y Durairaj, Thenmozhi
%Y Kovács, György
%Y García Cumbreras, Miguel Ángel
%S Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F manukonda-kodali-2024-bytellm
%X This research focuses on Homophobia and Transphobia Detection in Dravidian languages, specifically Telugu, Kannada, Tamil, and Malayalam. Leveraging the Homophobia/ Transphobia Detection dataset, we propose an innovative approach employing a custom-designed tokenizer with a Bidirectional Long Short-Term Memory (BiLSTM) architecture. Our distinctive contribution lies in a tokenizer that reduces model sizes to below 7MB, improving efficiency and addressing real-time deployment challenges. The BiLSTM implementation demonstrates significant enhancements in hate speech detection accuracy, effectively capturing linguistic nuances. Low-size models efficiently alleviate inference challenges, ensuring swift real-time detection and practical deployment. This work pioneers a framework for hate speech detection, providing insights into model size, inference speed, and real-time deployment challenges in combatting online hate speech within Dravidian languages.
%U https://aclanthology.org/2024.ltedi-1.16
%P 157-163
Markdown (Informal)
[byteLLM@LT-EDI-2024: Homophobia/Transphobia Detection in Social Media Comments - Custom Subword Tokenization with Subword2Vec and BiLSTM](https://aclanthology.org/2024.ltedi-1.16) (Manukonda & Kodali, LTEDI-WS 2024)
ACL