@inproceedings{mahaganapathy-etal-2026-bridging,
title = "Bridging Dialectal Variation: A Phonetic Transcription Tool for {T}amil",
author = "Mahaganapathy, Ahrane and
Karunakaran, Sumirtha and
Navakulan, Kavitha and
Sarveswaran, Kengatharaiyer",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.19/",
pages = "234--241",
abstract = "Phonetic transcription is vital for speech processing and linguistic documentation, particularly in languages like Tamil with complex phonology and dialectal variation. Challenges such as consonant gemination, retroflexion, vowel length, and one-to-many grapheme-phoneme mappings are compounded by limited data on Sri Lankan Tamil dialects. We present a dialect-aware, rule-based transcription tool for Tamil that supports Indian and Jaffna Tamil, with extensions underway for other dialects. Using a two-stage pipeline: Tamil script to Latin, then to IPA with context-sensitive rules, the tool handles dialect shifts. A real-time interface enables dialect selection. Evaluated on a 7,830-word corpus, it achieves 94.54{\%} accuracy for Jaffna Tamil and is higher than other tools like eSpeak NG, advancing linguistic preservation and accessible speech technology for Tamil communities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahaganapathy-etal-2026-bridging">
<titleInfo>
<title>Bridging Dialectal Variation: A Phonetic Transcription Tool for Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahrane</namePart>
<namePart type="family">Mahaganapathy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumirtha</namePart>
<namePart type="family">Karunakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kavitha</namePart>
<namePart type="family">Navakulan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Phonetic transcription is vital for speech processing and linguistic documentation, particularly in languages like Tamil with complex phonology and dialectal variation. Challenges such as consonant gemination, retroflexion, vowel length, and one-to-many grapheme-phoneme mappings are compounded by limited data on Sri Lankan Tamil dialects. We present a dialect-aware, rule-based transcription tool for Tamil that supports Indian and Jaffna Tamil, with extensions underway for other dialects. Using a two-stage pipeline: Tamil script to Latin, then to IPA with context-sensitive rules, the tool handles dialect shifts. A real-time interface enables dialect selection. Evaluated on a 7,830-word corpus, it achieves 94.54% accuracy for Jaffna Tamil and is higher than other tools like eSpeak NG, advancing linguistic preservation and accessible speech technology for Tamil communities.</abstract>
<identifier type="citekey">mahaganapathy-etal-2026-bridging</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.19/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>234</start>
<end>241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging Dialectal Variation: A Phonetic Transcription Tool for Tamil
%A Mahaganapathy, Ahrane
%A Karunakaran, Sumirtha
%A Navakulan, Kavitha
%A Sarveswaran, Kengatharaiyer
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F mahaganapathy-etal-2026-bridging
%X Phonetic transcription is vital for speech processing and linguistic documentation, particularly in languages like Tamil with complex phonology and dialectal variation. Challenges such as consonant gemination, retroflexion, vowel length, and one-to-many grapheme-phoneme mappings are compounded by limited data on Sri Lankan Tamil dialects. We present a dialect-aware, rule-based transcription tool for Tamil that supports Indian and Jaffna Tamil, with extensions underway for other dialects. Using a two-stage pipeline: Tamil script to Latin, then to IPA with context-sensitive rules, the tool handles dialect shifts. A real-time interface enables dialect selection. Evaluated on a 7,830-word corpus, it achieves 94.54% accuracy for Jaffna Tamil and is higher than other tools like eSpeak NG, advancing linguistic preservation and accessible speech technology for Tamil communities.
%U https://aclanthology.org/2026.vardial-1.19/
%P 234-241
Markdown (Informal)
[Bridging Dialectal Variation: A Phonetic Transcription Tool for Tamil](https://aclanthology.org/2026.vardial-1.19/) (Mahaganapathy et al., VarDial 2026)
ACL