@inproceedings{mujadia-sharma-2023-towards,
title = "Towards Speech to Speech Machine Translation focusing on {I}ndian Languages",
author = "Mujadia, Vandan and
Sharma, Dipti",
editor = "Croce, Danilo and
Soldaini, Luca",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-demo.19",
doi = "10.18653/v1/2023.eacl-demo.19",
pages = "161--168",
abstract = "We introduce an SSMT (Speech to Speech Machine Translation, aka Speech to Speech Video Translation) Pipeline(\url{https://ssmt.iiit.ac.in/ssmtiiith}), as web application for translating videos from one language to another by cascading multiple language modules. Our speech translation system combines highly accurate speech to text (ASR) for Indian English, pre-possessing modules to bridge ASR-MT gaps such as spoken disfluency and punctuation, robust machine translation (MT) systems for multiple language pairs, SRT module for translated text, text to speech (TTS) module and a module to render translated synthesized audio on the original video. It is user-friendly, flexible, and easily accessible system. We aim to provide a complete configurable speech translation experience to users and researchers with this system. It also supports human intervention where users can edit outputs of different modules and the edited output can then be used for subsequent processing to improve overall output quality. By adopting a human-in-the-loop approach, the aim is to configure technology in such a way where it can assist humans and help to reduce the involved human efforts in speech translation involving English and Indian languages. As per our understanding, this is the first fully integrated system for English to Indian languages (Hindi, Telugu, Gujarati, Marathi and Punjabi) video translation. Our evaluation shows that one can get 3.5+ MOS score using the developed pipeline with human intervention for English to Hindi. A short video demonstrating our system is available at \url{https://youtu.be/MVftzoeRg48}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mujadia-sharma-2023-towards">
<titleInfo>
<title>Towards Speech to Speech Machine Translation focusing on Indian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vandan</namePart>
<namePart type="family">Mujadia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Soldaini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce an SSMT (Speech to Speech Machine Translation, aka Speech to Speech Video Translation) Pipeline(https://ssmt.iiit.ac.in/ssmtiiith), as web application for translating videos from one language to another by cascading multiple language modules. Our speech translation system combines highly accurate speech to text (ASR) for Indian English, pre-possessing modules to bridge ASR-MT gaps such as spoken disfluency and punctuation, robust machine translation (MT) systems for multiple language pairs, SRT module for translated text, text to speech (TTS) module and a module to render translated synthesized audio on the original video. It is user-friendly, flexible, and easily accessible system. We aim to provide a complete configurable speech translation experience to users and researchers with this system. It also supports human intervention where users can edit outputs of different modules and the edited output can then be used for subsequent processing to improve overall output quality. By adopting a human-in-the-loop approach, the aim is to configure technology in such a way where it can assist humans and help to reduce the involved human efforts in speech translation involving English and Indian languages. As per our understanding, this is the first fully integrated system for English to Indian languages (Hindi, Telugu, Gujarati, Marathi and Punjabi) video translation. Our evaluation shows that one can get 3.5+ MOS score using the developed pipeline with human intervention for English to Hindi. A short video demonstrating our system is available at https://youtu.be/MVftzoeRg48.</abstract>
<identifier type="citekey">mujadia-sharma-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-demo.19</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-demo.19</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>161</start>
<end>168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Speech to Speech Machine Translation focusing on Indian Languages
%A Mujadia, Vandan
%A Sharma, Dipti
%Y Croce, Danilo
%Y Soldaini, Luca
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics: System Demonstrations
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F mujadia-sharma-2023-towards
%X We introduce an SSMT (Speech to Speech Machine Translation, aka Speech to Speech Video Translation) Pipeline(https://ssmt.iiit.ac.in/ssmtiiith), as web application for translating videos from one language to another by cascading multiple language modules. Our speech translation system combines highly accurate speech to text (ASR) for Indian English, pre-possessing modules to bridge ASR-MT gaps such as spoken disfluency and punctuation, robust machine translation (MT) systems for multiple language pairs, SRT module for translated text, text to speech (TTS) module and a module to render translated synthesized audio on the original video. It is user-friendly, flexible, and easily accessible system. We aim to provide a complete configurable speech translation experience to users and researchers with this system. It also supports human intervention where users can edit outputs of different modules and the edited output can then be used for subsequent processing to improve overall output quality. By adopting a human-in-the-loop approach, the aim is to configure technology in such a way where it can assist humans and help to reduce the involved human efforts in speech translation involving English and Indian languages. As per our understanding, this is the first fully integrated system for English to Indian languages (Hindi, Telugu, Gujarati, Marathi and Punjabi) video translation. Our evaluation shows that one can get 3.5+ MOS score using the developed pipeline with human intervention for English to Hindi. A short video demonstrating our system is available at https://youtu.be/MVftzoeRg48.
%R 10.18653/v1/2023.eacl-demo.19
%U https://aclanthology.org/2023.eacl-demo.19
%U https://doi.org/10.18653/v1/2023.eacl-demo.19
%P 161-168
Markdown (Informal)
[Towards Speech to Speech Machine Translation focusing on Indian Languages](https://aclanthology.org/2023.eacl-demo.19) (Mujadia & Sharma, EACL 2023)
ACL