@inproceedings{platnick-etal-2024-preset,
title = "Preset-Voice Matching for Privacy Regulated Speech-to-Speech Translation Systems",
author = "Platnick, Daniel and
Abdelnour, Bishoy and
Earl, Eamon and
Kumar, Rahul and
Rezaei, Zahra and
Tsangaris, Thomas and
Lagum, Faraj",
editor = "Habernal, Ivan and
Ghanavati, Sepideh and
Ravichander, Abhilasha and
Jain, Vijayanta and
Thaine, Patricia and
Igamberdiev, Timour and
Mireshghallah, Niloofar and
Feyisetan, Oluwaseyi",
booktitle = "Proceedings of the Fifth Workshop on Privacy in Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.privatenlp-1.6",
pages = "52--62",
abstract = "In recent years, there has been increased demand for speech-to-speech translation (S2ST) systems in industry settings. Although successfully commercialized, cloning-based S2ST systems expose their distributors to liabilities when misused by individuals and can infringe on personality rights when exploited by media organizations. This work proposes a regulated S2ST framework called Preset-Voice Matching (PVM). PVM removes cross-lingual voice cloning in S2ST by first matching the input voice to a similar prior consenting speaker voice in the target-language. With this separation, PVM avoids cloning the input speaker, ensuring PVM systems comply with regulations and reduce risk of misuse. Our results demonstrate PVM can significantly improve S2ST system run-time in multi-speaker settings and the naturalness of S2ST synthesized speech. To our knowledge, PVM is the first explicitly regulated S2ST framework leveraging similarly-matched preset-voices for dynamic S2ST tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="platnick-etal-2024-preset">
<titleInfo>
<title>Preset-Voice Matching for Privacy Regulated Speech-to-Speech Translation Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Platnick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bishoy</namePart>
<namePart type="family">Abdelnour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eamon</namePart>
<namePart type="family">Earl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zahra</namePart>
<namePart type="family">Rezaei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Tsangaris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faraj</namePart>
<namePart type="family">Lagum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Privacy in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sepideh</namePart>
<namePart type="family">Ghanavati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhilasha</namePart>
<namePart type="family">Ravichander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vijayanta</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Thaine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timour</namePart>
<namePart type="family">Igamberdiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niloofar</namePart>
<namePart type="family">Mireshghallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oluwaseyi</namePart>
<namePart type="family">Feyisetan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, there has been increased demand for speech-to-speech translation (S2ST) systems in industry settings. Although successfully commercialized, cloning-based S2ST systems expose their distributors to liabilities when misused by individuals and can infringe on personality rights when exploited by media organizations. This work proposes a regulated S2ST framework called Preset-Voice Matching (PVM). PVM removes cross-lingual voice cloning in S2ST by first matching the input voice to a similar prior consenting speaker voice in the target-language. With this separation, PVM avoids cloning the input speaker, ensuring PVM systems comply with regulations and reduce risk of misuse. Our results demonstrate PVM can significantly improve S2ST system run-time in multi-speaker settings and the naturalness of S2ST synthesized speech. To our knowledge, PVM is the first explicitly regulated S2ST framework leveraging similarly-matched preset-voices for dynamic S2ST tasks.</abstract>
<identifier type="citekey">platnick-etal-2024-preset</identifier>
<location>
<url>https://aclanthology.org/2024.privatenlp-1.6</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>52</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Preset-Voice Matching for Privacy Regulated Speech-to-Speech Translation Systems
%A Platnick, Daniel
%A Abdelnour, Bishoy
%A Earl, Eamon
%A Kumar, Rahul
%A Rezaei, Zahra
%A Tsangaris, Thomas
%A Lagum, Faraj
%Y Habernal, Ivan
%Y Ghanavati, Sepideh
%Y Ravichander, Abhilasha
%Y Jain, Vijayanta
%Y Thaine, Patricia
%Y Igamberdiev, Timour
%Y Mireshghallah, Niloofar
%Y Feyisetan, Oluwaseyi
%S Proceedings of the Fifth Workshop on Privacy in Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F platnick-etal-2024-preset
%X In recent years, there has been increased demand for speech-to-speech translation (S2ST) systems in industry settings. Although successfully commercialized, cloning-based S2ST systems expose their distributors to liabilities when misused by individuals and can infringe on personality rights when exploited by media organizations. This work proposes a regulated S2ST framework called Preset-Voice Matching (PVM). PVM removes cross-lingual voice cloning in S2ST by first matching the input voice to a similar prior consenting speaker voice in the target-language. With this separation, PVM avoids cloning the input speaker, ensuring PVM systems comply with regulations and reduce risk of misuse. Our results demonstrate PVM can significantly improve S2ST system run-time in multi-speaker settings and the naturalness of S2ST synthesized speech. To our knowledge, PVM is the first explicitly regulated S2ST framework leveraging similarly-matched preset-voices for dynamic S2ST tasks.
%U https://aclanthology.org/2024.privatenlp-1.6
%P 52-62
Markdown (Informal)
[Preset-Voice Matching for Privacy Regulated Speech-to-Speech Translation Systems](https://aclanthology.org/2024.privatenlp-1.6) (Platnick et al., PrivateNLP-WS 2024)
ACL