@inproceedings{johnson-etal-2026-hope,
title = "{H}ope{\_}{S}peech{\_}{A}lchemists@{D}ravidian{L}ang{T}ech 2026: {TF}-{IDF} {SVM} and {XLM}-{R}o{BERT}a with Focal Loss for Hope Speech Detection in {T}ulu",
author = "Johnson, Joel and
Francis, Meclin A and
Kumari, Jyoti and
Sreekumar, Malavika and
Ulli, Vinay Babu",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.dravidianlangtech-1.39/",
pages = "263--267",
ISBN = "979-8-89176-401-9",
abstract = "This paper describes our system submitted to the shared task on Hope Speech Detection in Tulu at DravidianLangTech@ACL 2026 hope-speech-dravidianlangtech-acl-2026. The task comprises two sub-tasks: coarse-grained classification into four categories Task 1 and fine-grained classification into five categories Task 2. We compare a traditional TF-IDF + LinearSVC baseline against XLM-RoBERTa fine-tuned with minority-class oversampling and Focal Loss. Our experiments reveal an interesting trade-off: while the transformer approach achieves the best validation Macro-F1 of 0.57 on the coarse-grained task, the TF-IDF baseline outperforms it on the smaller fine-grained task, highlighting the data scarcity threshold below which large pre-trained models struggle to generalise. On the official test set, our system achieves a Macro-F1 of 0.55 on Task 1 and 0.40 on Task 2. The code is publicly available at: https://github.com/meclin2345/Hope{\_}Speech{\_}Alchemists"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="johnson-etal-2026-hope">
<titleInfo>
<title>Hope_Speech_Alchemists@DravidianLangTech 2026: TF-IDF SVM and XLM-RoBERTa with Focal Loss for Hope Speech Detection in Tulu</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meclin</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Francis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">Kumari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malavika</namePart>
<namePart type="family">Sreekumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinay</namePart>
<namePart type="given">Babu</namePart>
<namePart type="family">Ulli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Rajiakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subalalitha</namePart>
<namePart type="family">Navaneethakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malliga</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kogilavani</namePart>
<namePart type="family">Shanmugavadivel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Underline (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-401-9</identifier>
</relatedItem>
<abstract>This paper describes our system submitted to the shared task on Hope Speech Detection in Tulu at DravidianLangTech@ACL 2026 hope-speech-dravidianlangtech-acl-2026. The task comprises two sub-tasks: coarse-grained classification into four categories Task 1 and fine-grained classification into five categories Task 2. We compare a traditional TF-IDF + LinearSVC baseline against XLM-RoBERTa fine-tuned with minority-class oversampling and Focal Loss. Our experiments reveal an interesting trade-off: while the transformer approach achieves the best validation Macro-F1 of 0.57 on the coarse-grained task, the TF-IDF baseline outperforms it on the smaller fine-grained task, highlighting the data scarcity threshold below which large pre-trained models struggle to generalise. On the official test set, our system achieves a Macro-F1 of 0.55 on Task 1 and 0.40 on Task 2. The code is publicly available at: https://github.com/meclin2345/Hope_Speech_Alchemists</abstract>
<identifier type="citekey">johnson-etal-2026-hope</identifier>
<location>
<url>https://aclanthology.org/2026.dravidianlangtech-1.39/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>263</start>
<end>267</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hope_Speech_Alchemists@DravidianLangTech 2026: TF-IDF SVM and XLM-RoBERTa with Focal Loss for Hope Speech Detection in Tulu
%A Johnson, Joel
%A Francis, Meclin A.
%A Kumari, Jyoti
%A Sreekumar, Malavika
%A Ulli, Vinay Babu
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Rajiakodi, Saranya
%Y Navaneethakrishnan, Subalalitha
%Y Chinnappa, Dhivya
%Y Palani, Balasubramanian
%Y Subramanian, Malliga
%Y Shanmugavadivel, Kogilavani
%Y Rajalakshmi, Ratnavel
%S Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2026
%8 July
%I Association for Computational Linguistics
%C Underline (Virtual)
%@ 979-8-89176-401-9
%F johnson-etal-2026-hope
%X This paper describes our system submitted to the shared task on Hope Speech Detection in Tulu at DravidianLangTech@ACL 2026 hope-speech-dravidianlangtech-acl-2026. The task comprises two sub-tasks: coarse-grained classification into four categories Task 1 and fine-grained classification into five categories Task 2. We compare a traditional TF-IDF + LinearSVC baseline against XLM-RoBERTa fine-tuned with minority-class oversampling and Focal Loss. Our experiments reveal an interesting trade-off: while the transformer approach achieves the best validation Macro-F1 of 0.57 on the coarse-grained task, the TF-IDF baseline outperforms it on the smaller fine-grained task, highlighting the data scarcity threshold below which large pre-trained models struggle to generalise. On the official test set, our system achieves a Macro-F1 of 0.55 on Task 1 and 0.40 on Task 2. The code is publicly available at: https://github.com/meclin2345/Hope_Speech_Alchemists
%U https://aclanthology.org/2026.dravidianlangtech-1.39/
%P 263-267
Markdown (Informal)
[Hope_Speech_Alchemists@DravidianLangTech 2026: TF-IDF SVM and XLM-RoBERTa with Focal Loss for Hope Speech Detection in Tulu](https://aclanthology.org/2026.dravidianlangtech-1.39/) (Johnson et al., DravidianLangTech 2026)
ACL