@inproceedings{karunanidhi-arumugam-2026-chmod-777,
title = "{CHMOD}{\_}777@{D}ravidian{L}ang{T}ech 2026: {LLM} Augmented Transformer Fine-tuning for {T}amil Political Sentiment Analysis",
author = "Karunanidhi, Arunaggiri Pandian and
Arumugam, Prabalakshmi",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.dravidianlangtech-1.23/",
pages = "181--185",
ISBN = "979-8-89176-401-9",
abstract = "This paper describes Team CHMOD{\_}777{'}s system for the DravidianLangTech@ACL 2026 shared task on political multiclass sentiment analysis of Tamil Twitter comments. The task requires classifying Tamil political tweets into seven sentiment categories under severe class imbalance (8:1 ratio). We address this challenge through LLM-based data augmentation using Gemini 2.5 Flash, expanding training data from 4,352 to 15,316 samples (3.5x the original). Our best system, MuRIL fine-tuned on augmented data with Focal Loss (gamma=3.0) and weighted sampling, achieves 35.79{\%} Macro F1 on the development set, a 67{\%} relative improvement over the non-augmented baseline. On the official test set, our system achieves 34.25{\%} Macro F1, ranking 12th out of 22 participating teams. We find that (1) language-specific pre-training (MuRIL, 236M) outperforms larger general models (IndicBERT-v3, 1B), (2) smaller models benefit disproportionately from augmentation, and (3) Substantiated is the hardest category (F1=10.7{\%}) due to its requirement for factual reasoning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karunanidhi-arumugam-2026-chmod-777">
<titleInfo>
<title>CHMOD_777@DravidianLangTech 2026: LLM Augmented Transformer Fine-tuning for Tamil Political Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arunaggiri</namePart>
<namePart type="given">Pandian</namePart>
<namePart type="family">Karunanidhi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prabalakshmi</namePart>
<namePart type="family">Arumugam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Rajiakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subalalitha</namePart>
<namePart type="family">Navaneethakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malliga</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kogilavani</namePart>
<namePart type="family">Shanmugavadivel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Underline (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-401-9</identifier>
</relatedItem>
<abstract>This paper describes Team CHMOD_777’s system for the DravidianLangTech@ACL 2026 shared task on political multiclass sentiment analysis of Tamil Twitter comments. The task requires classifying Tamil political tweets into seven sentiment categories under severe class imbalance (8:1 ratio). We address this challenge through LLM-based data augmentation using Gemini 2.5 Flash, expanding training data from 4,352 to 15,316 samples (3.5x the original). Our best system, MuRIL fine-tuned on augmented data with Focal Loss (gamma=3.0) and weighted sampling, achieves 35.79% Macro F1 on the development set, a 67% relative improvement over the non-augmented baseline. On the official test set, our system achieves 34.25% Macro F1, ranking 12th out of 22 participating teams. We find that (1) language-specific pre-training (MuRIL, 236M) outperforms larger general models (IndicBERT-v3, 1B), (2) smaller models benefit disproportionately from augmentation, and (3) Substantiated is the hardest category (F1=10.7%) due to its requirement for factual reasoning.</abstract>
<identifier type="citekey">karunanidhi-arumugam-2026-chmod-777</identifier>
<location>
<url>https://aclanthology.org/2026.dravidianlangtech-1.23/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>181</start>
<end>185</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CHMOD_777@DravidianLangTech 2026: LLM Augmented Transformer Fine-tuning for Tamil Political Sentiment Analysis
%A Karunanidhi, Arunaggiri Pandian
%A Arumugam, Prabalakshmi
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Rajiakodi, Saranya
%Y Navaneethakrishnan, Subalalitha
%Y Chinnappa, Dhivya
%Y Palani, Balasubramanian
%Y Subramanian, Malliga
%Y Shanmugavadivel, Kogilavani
%Y Rajalakshmi, Ratnavel
%S Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2026
%8 July
%I Association for Computational Linguistics
%C Underline (Virtual)
%@ 979-8-89176-401-9
%F karunanidhi-arumugam-2026-chmod-777
%X This paper describes Team CHMOD_777’s system for the DravidianLangTech@ACL 2026 shared task on political multiclass sentiment analysis of Tamil Twitter comments. The task requires classifying Tamil political tweets into seven sentiment categories under severe class imbalance (8:1 ratio). We address this challenge through LLM-based data augmentation using Gemini 2.5 Flash, expanding training data from 4,352 to 15,316 samples (3.5x the original). Our best system, MuRIL fine-tuned on augmented data with Focal Loss (gamma=3.0) and weighted sampling, achieves 35.79% Macro F1 on the development set, a 67% relative improvement over the non-augmented baseline. On the official test set, our system achieves 34.25% Macro F1, ranking 12th out of 22 participating teams. We find that (1) language-specific pre-training (MuRIL, 236M) outperforms larger general models (IndicBERT-v3, 1B), (2) smaller models benefit disproportionately from augmentation, and (3) Substantiated is the hardest category (F1=10.7%) due to its requirement for factual reasoning.
%U https://aclanthology.org/2026.dravidianlangtech-1.23/
%P 181-185
Markdown (Informal)
[CHMOD_777@DravidianLangTech 2026: LLM Augmented Transformer Fine-tuning for Tamil Political Sentiment Analysis](https://aclanthology.org/2026.dravidianlangtech-1.23/) (Karunanidhi & Arumugam, DravidianLangTech 2026)
ACL