@inproceedings{abhinav-etal-2026-dlrg,
title = "{DLRG}@{D}ravidian{L}ang{T}ech 2026: Dual-Purpose Whisper Adaptation for {T}amil Dialect Identification and Dialectal Speech Recognition",
author = "Abhinav, Gulisetty and
Nanda, Tanisha and
R, Ramesh Kannan and
Rajalakshmi, Ratnavel",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Rajiakodi, Saranya and
Navaneethakrishnan, Subalalitha and
Chinnappa, Dhivya and
Palani, Balasubramanian and
Subramanian, Malliga and
Shanmugavadivel, Kogilavani and
Rajalakshmi, Ratnavel",
booktitle = "Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for {D}ravidian Languages",
month = jul,
year = "2026",
address = "Underline (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.dravidianlangtech-1.33/",
pages = "232--236",
ISBN = "979-8-89176-401-9",
abstract = "This paper describes our system developed for the shared task on Dialect Based Speech Recognition and Classification in Tamil at DravidianLangTech@ACL 2026. We participated in both Subtask{~}1 (Dialect Identification) and Subtask{~}2 (Dialectal ASR). Our approach leverages a single Tamil-adapted Whisper Medium model as a unified foundation for both tasks. For dialect classification, we have used the Whisper encoder as a feature extractor by discarding the decoder, applying mean pooling over the temporal dimension, and fine-tuning the full encoder with a lightweight classification head, achieving 73.4{\%} accuracy on the test set. For dialectal ASR, we apply Low-Rank Adaptation (LoRA) to the full encoder-decoder architecture with SpecAugment-based data augmentation, achieving a Word Error Rate (WER) of 0.55 on the test set. Our experiments reveal that unfreezing the pre-trained encoder is critical for dialect discrimination, boosting accuracy from 52.78{\%} (frozen) to 73.4{\%} (unfrozen). The code is publicly available at https://github.com/DLRG-VIT/DravidianLangTech2026"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abhinav-etal-2026-dlrg">
<titleInfo>
<title>DLRG@DravidianLangTech 2026: Dual-Purpose Whisper Adaptation for Tamil Dialect Identification and Dialectal Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gulisetty</namePart>
<namePart type="family">Abhinav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanisha</namePart>
<namePart type="family">Nanda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramesh</namePart>
<namePart type="given">Kannan</namePart>
<namePart type="family">R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saranya</namePart>
<namePart type="family">Rajiakodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subalalitha</namePart>
<namePart type="family">Navaneethakrishnan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhivya</namePart>
<namePart type="family">Chinnappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balasubramanian</namePart>
<namePart type="family">Palani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malliga</namePart>
<namePart type="family">Subramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kogilavani</namePart>
<namePart type="family">Shanmugavadivel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ratnavel</namePart>
<namePart type="family">Rajalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Underline (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-401-9</identifier>
</relatedItem>
<abstract>This paper describes our system developed for the shared task on Dialect Based Speech Recognition and Classification in Tamil at DravidianLangTech@ACL 2026. We participated in both Subtask 1 (Dialect Identification) and Subtask 2 (Dialectal ASR). Our approach leverages a single Tamil-adapted Whisper Medium model as a unified foundation for both tasks. For dialect classification, we have used the Whisper encoder as a feature extractor by discarding the decoder, applying mean pooling over the temporal dimension, and fine-tuning the full encoder with a lightweight classification head, achieving 73.4% accuracy on the test set. For dialectal ASR, we apply Low-Rank Adaptation (LoRA) to the full encoder-decoder architecture with SpecAugment-based data augmentation, achieving a Word Error Rate (WER) of 0.55 on the test set. Our experiments reveal that unfreezing the pre-trained encoder is critical for dialect discrimination, boosting accuracy from 52.78% (frozen) to 73.4% (unfrozen). The code is publicly available at https://github.com/DLRG-VIT/DravidianLangTech2026</abstract>
<identifier type="citekey">abhinav-etal-2026-dlrg</identifier>
<location>
<url>https://aclanthology.org/2026.dravidianlangtech-1.33/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>232</start>
<end>236</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DLRG@DravidianLangTech 2026: Dual-Purpose Whisper Adaptation for Tamil Dialect Identification and Dialectal Speech Recognition
%A Abhinav, Gulisetty
%A Nanda, Tanisha
%A R, Ramesh Kannan
%A Rajalakshmi, Ratnavel
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Rajiakodi, Saranya
%Y Navaneethakrishnan, Subalalitha
%Y Chinnappa, Dhivya
%Y Palani, Balasubramanian
%Y Subramanian, Malliga
%Y Shanmugavadivel, Kogilavani
%Y Rajalakshmi, Ratnavel
%S Proceedings of the Sixth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2026
%8 July
%I Association for Computational Linguistics
%C Underline (Virtual)
%@ 979-8-89176-401-9
%F abhinav-etal-2026-dlrg
%X This paper describes our system developed for the shared task on Dialect Based Speech Recognition and Classification in Tamil at DravidianLangTech@ACL 2026. We participated in both Subtask 1 (Dialect Identification) and Subtask 2 (Dialectal ASR). Our approach leverages a single Tamil-adapted Whisper Medium model as a unified foundation for both tasks. For dialect classification, we have used the Whisper encoder as a feature extractor by discarding the decoder, applying mean pooling over the temporal dimension, and fine-tuning the full encoder with a lightweight classification head, achieving 73.4% accuracy on the test set. For dialectal ASR, we apply Low-Rank Adaptation (LoRA) to the full encoder-decoder architecture with SpecAugment-based data augmentation, achieving a Word Error Rate (WER) of 0.55 on the test set. Our experiments reveal that unfreezing the pre-trained encoder is critical for dialect discrimination, boosting accuracy from 52.78% (frozen) to 73.4% (unfrozen). The code is publicly available at https://github.com/DLRG-VIT/DravidianLangTech2026
%U https://aclanthology.org/2026.dravidianlangtech-1.33/
%P 232-236
Markdown (Informal)
[DLRG@DravidianLangTech 2026: Dual-Purpose Whisper Adaptation for Tamil Dialect Identification and Dialectal Speech Recognition](https://aclanthology.org/2026.dravidianlangtech-1.33/) (Abhinav et al., DravidianLangTech 2026)
ACL