@inproceedings{gajewska-etal-2025-voice,
title = "Voice synthesis in {P}olish and {E}nglish - analyzing prediction differences in speaker verification systems",
author = "Gajewska, Joanna and
Martinek, Alicja and
O{\l}owski, Micha{\l} J. and
Bartuzi-Trokielewicz, Ewelina",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.643/",
pages = "9618--9629",
abstract = "Deep learning has significantly enhanced voice synthesis, yielding realistic audio capable of mimicking individual voices. This progress, however, raises security concerns due to the potential misuse of audio deepfakes. Our research examines the effects of deepfakes on speaker recognition systems across English and Polish corpora, assessing both Text-to-Speech and Voice Conversion methods. We focus on the biometric similarity`s role in the effectiveness of impersonations and find that synthetic voices can maintain personal traits, posing risks of unauthorized access. The study`s key contributions include analyzing voice synthesis across languages, evaluating biometric resemblance in voice conversion, and contrasting Text-to-Speech and Voice Conversion paradigms. These insights emphasize the need for improved biometric security against audio deepfake threats."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gajewska-etal-2025-voice">
<titleInfo>
<title>Voice synthesis in Polish and English - analyzing prediction differences in speaker verification systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joanna</namePart>
<namePart type="family">Gajewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicja</namePart>
<namePart type="family">Martinek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Ołowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewelina</namePart>
<namePart type="family">Bartuzi-Trokielewicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deep learning has significantly enhanced voice synthesis, yielding realistic audio capable of mimicking individual voices. This progress, however, raises security concerns due to the potential misuse of audio deepfakes. Our research examines the effects of deepfakes on speaker recognition systems across English and Polish corpora, assessing both Text-to-Speech and Voice Conversion methods. We focus on the biometric similarity‘s role in the effectiveness of impersonations and find that synthetic voices can maintain personal traits, posing risks of unauthorized access. The study‘s key contributions include analyzing voice synthesis across languages, evaluating biometric resemblance in voice conversion, and contrasting Text-to-Speech and Voice Conversion paradigms. These insights emphasize the need for improved biometric security against audio deepfake threats.</abstract>
<identifier type="citekey">gajewska-etal-2025-voice</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.643/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>9618</start>
<end>9629</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Voice synthesis in Polish and English - analyzing prediction differences in speaker verification systems
%A Gajewska, Joanna
%A Martinek, Alicja
%A Ołowski, Michał J.
%A Bartuzi-Trokielewicz, Ewelina
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F gajewska-etal-2025-voice
%X Deep learning has significantly enhanced voice synthesis, yielding realistic audio capable of mimicking individual voices. This progress, however, raises security concerns due to the potential misuse of audio deepfakes. Our research examines the effects of deepfakes on speaker recognition systems across English and Polish corpora, assessing both Text-to-Speech and Voice Conversion methods. We focus on the biometric similarity‘s role in the effectiveness of impersonations and find that synthetic voices can maintain personal traits, posing risks of unauthorized access. The study‘s key contributions include analyzing voice synthesis across languages, evaluating biometric resemblance in voice conversion, and contrasting Text-to-Speech and Voice Conversion paradigms. These insights emphasize the need for improved biometric security against audio deepfake threats.
%U https://aclanthology.org/2025.coling-main.643/
%P 9618-9629
Markdown (Informal)
[Voice synthesis in Polish and English - analyzing prediction differences in speaker verification systems](https://aclanthology.org/2025.coling-main.643/) (Gajewska et al., COLING 2025)
ACL