@inproceedings{sood-etal-2025-speech,
title = "Speech-to-Speech Machine Translation for Dialectal Variations of {H}indi",
author = "Sood, Sanmay and
Rajput, Siddharth and
Akhtar, Md Shad",
editor = "Nakazawa, Toshiaki and
Goto, Isao",
booktitle = "Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wat-1.5/",
pages = "54--65",
ISBN = "979-8-89176-309-8",
abstract = "Hindi has many dialects and they are vital to India{'}s cultural and linguistics heritage. However, many of them have been largely overlooked in modern language technological advancements, primarily due to lack proper resources. In this study, we explore speech-to-speech machine translation (S2ST) for four Hindi dialects, i.e., \textit{Awadhi}, \textit{Bhojpuri}, \textit{Braj Bhasha}, and \textit{Magahi}. We adopt a cascaded S2ST pipeline comprising of three stages: Automatic Speech Recognition (ASR), Machine Translation (MT), and Text-to-Speech (TTS). We evaluate many recent large language models (LLMs) for dialect-to-Hindi and dialect-to-English translations in zero-shot, few-shot, and chain-of-thought setups. Our comparative analysis offers insights into the current capabilities and limitations of LLM-based approaches for low-resource dialectal S2ST in Indian context."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sood-etal-2025-speech">
<titleInfo>
<title>Speech-to-Speech Machine Translation for Dialectal Variations of Hindi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanmay</namePart>
<namePart type="family">Sood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Rajput</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Shad</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-309-8</identifier>
</relatedItem>
<abstract>Hindi has many dialects and they are vital to India’s cultural and linguistics heritage. However, many of them have been largely overlooked in modern language technological advancements, primarily due to lack proper resources. In this study, we explore speech-to-speech machine translation (S2ST) for four Hindi dialects, i.e., Awadhi, Bhojpuri, Braj Bhasha, and Magahi. We adopt a cascaded S2ST pipeline comprising of three stages: Automatic Speech Recognition (ASR), Machine Translation (MT), and Text-to-Speech (TTS). We evaluate many recent large language models (LLMs) for dialect-to-Hindi and dialect-to-English translations in zero-shot, few-shot, and chain-of-thought setups. Our comparative analysis offers insights into the current capabilities and limitations of LLM-based approaches for low-resource dialectal S2ST in Indian context.</abstract>
<identifier type="citekey">sood-etal-2025-speech</identifier>
<location>
<url>https://aclanthology.org/2025.wat-1.5/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>54</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech-to-Speech Machine Translation for Dialectal Variations of Hindi
%A Sood, Sanmay
%A Rajput, Siddharth
%A Akhtar, Md Shad
%Y Nakazawa, Toshiaki
%Y Goto, Isao
%S Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-309-8
%F sood-etal-2025-speech
%X Hindi has many dialects and they are vital to India’s cultural and linguistics heritage. However, many of them have been largely overlooked in modern language technological advancements, primarily due to lack proper resources. In this study, we explore speech-to-speech machine translation (S2ST) for four Hindi dialects, i.e., Awadhi, Bhojpuri, Braj Bhasha, and Magahi. We adopt a cascaded S2ST pipeline comprising of three stages: Automatic Speech Recognition (ASR), Machine Translation (MT), and Text-to-Speech (TTS). We evaluate many recent large language models (LLMs) for dialect-to-Hindi and dialect-to-English translations in zero-shot, few-shot, and chain-of-thought setups. Our comparative analysis offers insights into the current capabilities and limitations of LLM-based approaches for low-resource dialectal S2ST in Indian context.
%U https://aclanthology.org/2025.wat-1.5/
%P 54-65
Markdown (Informal)
[Speech-to-Speech Machine Translation for Dialectal Variations of Hindi](https://aclanthology.org/2025.wat-1.5/) (Sood et al., WAT 2025)
ACL