@inproceedings{mandravickaite-etal-2025-exploring,
title = "Exploring Hate Speech Detection Models for {L}ithuanian Language",
author = "Mandravickait{\.{e}}, Justina and
Rimkien{\.{e}}, Egl{\.{e}} and
Petkevi{\v{c}}ius, Mindaugas and
Songailait{\.{e}}, Milita and
Zaranka, Eimantas and
Krilavi{\v{c}}ius, Tomas",
editor = "Calabrese, Agostina and
de Kock, Christine and
Nozza, Debora and
Plaza-del-Arco, Flor Miriam and
Talat, Zeerak and
Vargas, Francielle",
booktitle = "Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.woah-1.18/",
pages = "206--218",
ISBN = "979-8-89176-105-6",
abstract = "Online hate speech poses a significant challenge, as it can incite violence and contribute to social polarization. This study evaluates traditional machine learning, deep learning and large language models (LLMs) for Lithuanian hate speech detection, addressing class imbalance issue via data augmentation and resampling techniques. Our dataset included 27,358 user-generated comments, annotated into Neutral language (56{\%}), Offensive language (29{\%}) and Hate speech (15{\%}). We trained BiLSTM, LSTM, CNN, SVM, and Random Forest models and fine-tuned Multilingual BERT, LitLat BERT, Electra, RWKV, ChatGPT, LT-Llama-2, and Gemma-2 models. Additionally, we pre-trained Electra for Lithuanian. Models were evaluated using accuracy and weighted F1-score. On the imbalanced dataset, LitLat BERT (0.76 weighted F1-score) and Multilingual BERT (0.73 weighted F1-score) performed best. Over-sampling further boosted weighted F1-scores, with Multilingual BERT (0.85) and LitLat BERT (0.84) outperforming other models. Over-sampling combined with augmentation provided the best overall results. Under-sampling led to performance declines and was less effective. Finally, fine-tuning LLMs improved their accuracy which highlighted the importance of fine-tuning for more specialized NLP tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mandravickaite-etal-2025-exploring">
<titleInfo>
<title>Exploring Hate Speech Detection Models for Lithuanian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justina</namePart>
<namePart type="family">Mandravickaitė</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eglė</namePart>
<namePart type="family">Rimkienė</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mindaugas</namePart>
<namePart type="family">Petkevičius</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milita</namePart>
<namePart type="family">Songailaitė</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eimantas</namePart>
<namePart type="family">Zaranka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomas</namePart>
<namePart type="family">Krilavičius</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agostina</namePart>
<namePart type="family">Calabrese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">de Kock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeerak</namePart>
<namePart type="family">Talat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francielle</namePart>
<namePart type="family">Vargas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-105-6</identifier>
</relatedItem>
<abstract>Online hate speech poses a significant challenge, as it can incite violence and contribute to social polarization. This study evaluates traditional machine learning, deep learning and large language models (LLMs) for Lithuanian hate speech detection, addressing class imbalance issue via data augmentation and resampling techniques. Our dataset included 27,358 user-generated comments, annotated into Neutral language (56%), Offensive language (29%) and Hate speech (15%). We trained BiLSTM, LSTM, CNN, SVM, and Random Forest models and fine-tuned Multilingual BERT, LitLat BERT, Electra, RWKV, ChatGPT, LT-Llama-2, and Gemma-2 models. Additionally, we pre-trained Electra for Lithuanian. Models were evaluated using accuracy and weighted F1-score. On the imbalanced dataset, LitLat BERT (0.76 weighted F1-score) and Multilingual BERT (0.73 weighted F1-score) performed best. Over-sampling further boosted weighted F1-scores, with Multilingual BERT (0.85) and LitLat BERT (0.84) outperforming other models. Over-sampling combined with augmentation provided the best overall results. Under-sampling led to performance declines and was less effective. Finally, fine-tuning LLMs improved their accuracy which highlighted the importance of fine-tuning for more specialized NLP tasks.</abstract>
<identifier type="citekey">mandravickaite-etal-2025-exploring</identifier>
<location>
<url>https://aclanthology.org/2025.woah-1.18/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>206</start>
<end>218</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Hate Speech Detection Models for Lithuanian Language
%A Mandravickaitė, Justina
%A Rimkienė, Eglė
%A Petkevičius, Mindaugas
%A Songailaitė, Milita
%A Zaranka, Eimantas
%A Krilavičius, Tomas
%Y Calabrese, Agostina
%Y de Kock, Christine
%Y Nozza, Debora
%Y Plaza-del-Arco, Flor Miriam
%Y Talat, Zeerak
%Y Vargas, Francielle
%S Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-105-6
%F mandravickaite-etal-2025-exploring
%X Online hate speech poses a significant challenge, as it can incite violence and contribute to social polarization. This study evaluates traditional machine learning, deep learning and large language models (LLMs) for Lithuanian hate speech detection, addressing class imbalance issue via data augmentation and resampling techniques. Our dataset included 27,358 user-generated comments, annotated into Neutral language (56%), Offensive language (29%) and Hate speech (15%). We trained BiLSTM, LSTM, CNN, SVM, and Random Forest models and fine-tuned Multilingual BERT, LitLat BERT, Electra, RWKV, ChatGPT, LT-Llama-2, and Gemma-2 models. Additionally, we pre-trained Electra for Lithuanian. Models were evaluated using accuracy and weighted F1-score. On the imbalanced dataset, LitLat BERT (0.76 weighted F1-score) and Multilingual BERT (0.73 weighted F1-score) performed best. Over-sampling further boosted weighted F1-scores, with Multilingual BERT (0.85) and LitLat BERT (0.84) outperforming other models. Over-sampling combined with augmentation provided the best overall results. Under-sampling led to performance declines and was less effective. Finally, fine-tuning LLMs improved their accuracy which highlighted the importance of fine-tuning for more specialized NLP tasks.
%U https://aclanthology.org/2025.woah-1.18/
%P 206-218
Markdown (Informal)
[Exploring Hate Speech Detection Models for Lithuanian Language](https://aclanthology.org/2025.woah-1.18/) (Mandravickaitė et al., WOAH 2025)
ACL
- Justina Mandravickaitė, Eglė Rimkienė, Mindaugas Petkevičius, Milita Songailaitė, Eimantas Zaranka, and Tomas Krilavičius. 2025. Exploring Hate Speech Detection Models for Lithuanian Language. In Proceedings of the The 9th Workshop on Online Abuse and Harms (WOAH), pages 206–218, Vienna, Austria. Association for Computational Linguistics.