@inproceedings{aicher-etal-2022-towards-speech,
title = "Towards Speech-only Opinion-level Sentiment Analysis",
author = "Aicher, Annalena and
Gazizullina, Alisa and
Gusev, Aleksei and
Matveev, Yuri and
Minker, Wolfgang",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.215",
pages = "2000--2006",
abstract = "The growing popularity of various forms of Spoken Dialogue Systems (SDS) raises the demand for their capability of implicitly assessing the speaker{'}s sentiment from speech only. Mapping the latter on user preferences enables to adapt to the user and individualize the requested information while increasing user satisfaction. In this paper, we explore the integration of rank consistent ordinal regression into a speech-only sentiment prediction task performed by ResNet-like systems. Furthermore, we use speaker verification extractors trained on larger datasets as low-level feature extractors. An improvement of performance is shown by fusing sentiment and pre-extracted speaker embeddings reducing the speaker bias of sentiment predictions. Numerous experiments on Multimodal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) databases show that we beat the baselines of state-of-the-art unimodal approaches. Using speech as the only modality combined with optimizing an order-sensitive objective function gets significantly closer to the sentiment analysis results of state-of-the-art multimodal systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aicher-etal-2022-towards-speech">
<titleInfo>
<title>Towards Speech-only Opinion-level Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annalena</namePart>
<namePart type="family">Aicher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alisa</namePart>
<namePart type="family">Gazizullina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksei</namePart>
<namePart type="family">Gusev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Matveev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Minker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The growing popularity of various forms of Spoken Dialogue Systems (SDS) raises the demand for their capability of implicitly assessing the speaker’s sentiment from speech only. Mapping the latter on user preferences enables to adapt to the user and individualize the requested information while increasing user satisfaction. In this paper, we explore the integration of rank consistent ordinal regression into a speech-only sentiment prediction task performed by ResNet-like systems. Furthermore, we use speaker verification extractors trained on larger datasets as low-level feature extractors. An improvement of performance is shown by fusing sentiment and pre-extracted speaker embeddings reducing the speaker bias of sentiment predictions. Numerous experiments on Multimodal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) databases show that we beat the baselines of state-of-the-art unimodal approaches. Using speech as the only modality combined with optimizing an order-sensitive objective function gets significantly closer to the sentiment analysis results of state-of-the-art multimodal systems.</abstract>
<identifier type="citekey">aicher-etal-2022-towards-speech</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.215</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>2000</start>
<end>2006</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Speech-only Opinion-level Sentiment Analysis
%A Aicher, Annalena
%A Gazizullina, Alisa
%A Gusev, Aleksei
%A Matveev, Yuri
%A Minker, Wolfgang
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F aicher-etal-2022-towards-speech
%X The growing popularity of various forms of Spoken Dialogue Systems (SDS) raises the demand for their capability of implicitly assessing the speaker’s sentiment from speech only. Mapping the latter on user preferences enables to adapt to the user and individualize the requested information while increasing user satisfaction. In this paper, we explore the integration of rank consistent ordinal regression into a speech-only sentiment prediction task performed by ResNet-like systems. Furthermore, we use speaker verification extractors trained on larger datasets as low-level feature extractors. An improvement of performance is shown by fusing sentiment and pre-extracted speaker embeddings reducing the speaker bias of sentiment predictions. Numerous experiments on Multimodal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) databases show that we beat the baselines of state-of-the-art unimodal approaches. Using speech as the only modality combined with optimizing an order-sensitive objective function gets significantly closer to the sentiment analysis results of state-of-the-art multimodal systems.
%U https://aclanthology.org/2022.lrec-1.215
%P 2000-2006
Markdown (Informal)
[Towards Speech-only Opinion-level Sentiment Analysis](https://aclanthology.org/2022.lrec-1.215) (Aicher et al., LREC 2022)
ACL
- Annalena Aicher, Alisa Gazizullina, Aleksei Gusev, Yuri Matveev, and Wolfgang Minker. 2022. Towards Speech-only Opinion-level Sentiment Analysis. In Proceedings of the Thirteenth Language Resources and Evaluation Conference, pages 2000–2006, Marseille, France. European Language Resources Association.