@inproceedings{szekely-etal-2024-voice,
title = "Voice and Choice: Investigating the Role of Prosodic Variation in Request Compliance and Perceived Politeness Using Conversational {TTS}",
author = "Szekely, Eva and
Higginbotham, Jeff and
Possemato, Francesco",
editor = "Kawahara, Tatsuya and
Demberg, Vera and
Ultes, Stefan and
Inoue, Koji and
Mehri, Shikib and
Howcroft, David and
Komatani, Kazunori",
booktitle = "Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2024",
address = "Kyoto, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigdial-1.40",
doi = "10.18653/v1/2024.sigdial-1.40",
pages = "466--476",
abstract = "As conversational Text-to-Speech (TTS) technologies become increasingly realistic and expressive, understanding the impact of prosodic variation on speech perception and social dynamics is crucial for enhancing conversational systems. This study explores the influence of prosodic features on listener responses to indirect requests using a specifically designed conversational TTS engine capable of controlling prosody, and generating speech across three different speaker profiles: female, male, and gender-ambiguous. We conducted two experiments to analyse how naturalistic variations in speech rate and vocal energy (projection) impact the likelihood of request compliance and perceived politeness. In the first experiment, we examined how prosodic modifications affect the perception of politeness in permission- and service requests. In the second experiment participants compared pairs of spoken requests, each rendered with different prosodic features, and chose which they were more likely to grant. Results indicate that both faster speech rates and higher projection increased the willingness to comply, though the extent of this influence varied by speaker gender. Higher projection in service request increases the chance of being granted more than in permission requests. Politeness has a demonstrated positive impact on the likelihood of requests being granted, this effect is stronger for the male voice compared to female and gender-ambiguous voices.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="szekely-etal-2024-voice">
<titleInfo>
<title>Voice and Choice: Investigating the Role of Prosodic Variation in Request Compliance and Perceived Politeness Using Conversational TTS</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Szekely</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeff</namePart>
<namePart type="family">Higginbotham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Possemato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tatsuya</namePart>
<namePart type="family">Kawahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikib</namePart>
<namePart type="family">Mehri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazunori</namePart>
<namePart type="family">Komatani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyoto, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As conversational Text-to-Speech (TTS) technologies become increasingly realistic and expressive, understanding the impact of prosodic variation on speech perception and social dynamics is crucial for enhancing conversational systems. This study explores the influence of prosodic features on listener responses to indirect requests using a specifically designed conversational TTS engine capable of controlling prosody, and generating speech across three different speaker profiles: female, male, and gender-ambiguous. We conducted two experiments to analyse how naturalistic variations in speech rate and vocal energy (projection) impact the likelihood of request compliance and perceived politeness. In the first experiment, we examined how prosodic modifications affect the perception of politeness in permission- and service requests. In the second experiment participants compared pairs of spoken requests, each rendered with different prosodic features, and chose which they were more likely to grant. Results indicate that both faster speech rates and higher projection increased the willingness to comply, though the extent of this influence varied by speaker gender. Higher projection in service request increases the chance of being granted more than in permission requests. Politeness has a demonstrated positive impact on the likelihood of requests being granted, this effect is stronger for the male voice compared to female and gender-ambiguous voices.</abstract>
<identifier type="citekey">szekely-etal-2024-voice</identifier>
<identifier type="doi">10.18653/v1/2024.sigdial-1.40</identifier>
<location>
<url>https://aclanthology.org/2024.sigdial-1.40</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>466</start>
<end>476</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Voice and Choice: Investigating the Role of Prosodic Variation in Request Compliance and Perceived Politeness Using Conversational TTS
%A Szekely, Eva
%A Higginbotham, Jeff
%A Possemato, Francesco
%Y Kawahara, Tatsuya
%Y Demberg, Vera
%Y Ultes, Stefan
%Y Inoue, Koji
%Y Mehri, Shikib
%Y Howcroft, David
%Y Komatani, Kazunori
%S Proceedings of the 25th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2024
%8 September
%I Association for Computational Linguistics
%C Kyoto, Japan
%F szekely-etal-2024-voice
%X As conversational Text-to-Speech (TTS) technologies become increasingly realistic and expressive, understanding the impact of prosodic variation on speech perception and social dynamics is crucial for enhancing conversational systems. This study explores the influence of prosodic features on listener responses to indirect requests using a specifically designed conversational TTS engine capable of controlling prosody, and generating speech across three different speaker profiles: female, male, and gender-ambiguous. We conducted two experiments to analyse how naturalistic variations in speech rate and vocal energy (projection) impact the likelihood of request compliance and perceived politeness. In the first experiment, we examined how prosodic modifications affect the perception of politeness in permission- and service requests. In the second experiment participants compared pairs of spoken requests, each rendered with different prosodic features, and chose which they were more likely to grant. Results indicate that both faster speech rates and higher projection increased the willingness to comply, though the extent of this influence varied by speaker gender. Higher projection in service request increases the chance of being granted more than in permission requests. Politeness has a demonstrated positive impact on the likelihood of requests being granted, this effect is stronger for the male voice compared to female and gender-ambiguous voices.
%R 10.18653/v1/2024.sigdial-1.40
%U https://aclanthology.org/2024.sigdial-1.40
%U https://doi.org/10.18653/v1/2024.sigdial-1.40
%P 466-476
Markdown (Informal)
[Voice and Choice: Investigating the Role of Prosodic Variation in Request Compliance and Perceived Politeness Using Conversational TTS](https://aclanthology.org/2024.sigdial-1.40) (Szekely et al., SIGDIAL 2024)
ACL