@inproceedings{gao-etal-2023-synthesising,
title = "Synthesising Personality with Neural Speech Synthesis",
author = "Gao, Shilin and
Aylett, Matthew P. and
Braude, David A. and
Lai, Catherine",
editor = "Stoyanchev, Svetlana and
Joty, Shafiq and
Schlangen, David and
Dusek, Ondrej and
Kennington, Casey and
Alikhani, Malihe",
booktitle = "Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2023",
address = "Prague, Czechia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.sigdial-1.36/",
doi = "10.18653/v1/2023.sigdial-1.36",
pages = "393--399",
abstract = "Matching the personality of conversational agent to the personality of the user can significantly improve the user experience, with many successful examples in text-based chatbots. It is also important for a voice-based system to be able to alter the personality of the speech as perceived by the users. In this pilot study, fifteen voices were rated using Big Five personality traits. Five content-neutral sentences were chosen for the listening tests. The audio data, together with two rated traits (Extroversion and Agreeableness), were used to train a neural speech synthesiser based on one male and one female voices. The effect of altering the personality trait features was evaluated by a second listening test. Both perceived extroversion and agreeableness in the synthetic voices were affected significantly. The controllable range was limited due to a lack of variance in the source audio data. The perceived personality traits correlated with each other and with the naturalness of the speech. Future work can be making a chatbot speak in a voice with a pre-defined or adaptive personality by using personality synthesis in speech together with text-based personality generation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gao-etal-2023-synthesising">
<titleInfo>
<title>Synthesising Personality with Neural Speech Synthesis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shilin</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Aylett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Braude</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catherine</namePart>
<namePart type="family">Lai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svetlana</namePart>
<namePart type="family">Stoyanchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shafiq</namePart>
<namePart type="family">Joty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Dusek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Casey</namePart>
<namePart type="family">Kennington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Prague, Czechia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Matching the personality of conversational agent to the personality of the user can significantly improve the user experience, with many successful examples in text-based chatbots. It is also important for a voice-based system to be able to alter the personality of the speech as perceived by the users. In this pilot study, fifteen voices were rated using Big Five personality traits. Five content-neutral sentences were chosen for the listening tests. The audio data, together with two rated traits (Extroversion and Agreeableness), were used to train a neural speech synthesiser based on one male and one female voices. The effect of altering the personality trait features was evaluated by a second listening test. Both perceived extroversion and agreeableness in the synthetic voices were affected significantly. The controllable range was limited due to a lack of variance in the source audio data. The perceived personality traits correlated with each other and with the naturalness of the speech. Future work can be making a chatbot speak in a voice with a pre-defined or adaptive personality by using personality synthesis in speech together with text-based personality generation.</abstract>
<identifier type="citekey">gao-etal-2023-synthesising</identifier>
<identifier type="doi">10.18653/v1/2023.sigdial-1.36</identifier>
<location>
<url>https://aclanthology.org/2023.sigdial-1.36/</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>393</start>
<end>399</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Synthesising Personality with Neural Speech Synthesis
%A Gao, Shilin
%A Aylett, Matthew P.
%A Braude, David A.
%A Lai, Catherine
%Y Stoyanchev, Svetlana
%Y Joty, Shafiq
%Y Schlangen, David
%Y Dusek, Ondrej
%Y Kennington, Casey
%Y Alikhani, Malihe
%S Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2023
%8 September
%I Association for Computational Linguistics
%C Prague, Czechia
%F gao-etal-2023-synthesising
%X Matching the personality of conversational agent to the personality of the user can significantly improve the user experience, with many successful examples in text-based chatbots. It is also important for a voice-based system to be able to alter the personality of the speech as perceived by the users. In this pilot study, fifteen voices were rated using Big Five personality traits. Five content-neutral sentences were chosen for the listening tests. The audio data, together with two rated traits (Extroversion and Agreeableness), were used to train a neural speech synthesiser based on one male and one female voices. The effect of altering the personality trait features was evaluated by a second listening test. Both perceived extroversion and agreeableness in the synthetic voices were affected significantly. The controllable range was limited due to a lack of variance in the source audio data. The perceived personality traits correlated with each other and with the naturalness of the speech. Future work can be making a chatbot speak in a voice with a pre-defined or adaptive personality by using personality synthesis in speech together with text-based personality generation.
%R 10.18653/v1/2023.sigdial-1.36
%U https://aclanthology.org/2023.sigdial-1.36/
%U https://doi.org/10.18653/v1/2023.sigdial-1.36
%P 393-399
Markdown (Informal)
[Synthesising Personality with Neural Speech Synthesis](https://aclanthology.org/2023.sigdial-1.36/) (Gao et al., SIGDIAL 2023)
ACL
- Shilin Gao, Matthew P. Aylett, David A. Braude, and Catherine Lai. 2023. Synthesising Personality with Neural Speech Synthesis. In Proceedings of the 24th Annual Meeting of the Special Interest Group on Discourse and Dialogue, pages 393–399, Prague, Czechia. Association for Computational Linguistics.