@inproceedings{hazarika-etal-2018-conversational,
title = "Conversational Memory Network for Emotion Recognition in Dyadic Dialogue Videos",
author = "Hazarika, Devamanyu and
Poria, Soujanya and
Zadeh, Amir and
Cambria, Erik and
Morency, Louis-Philippe and
Zimmermann, Roger",
editor = "Walker, Marilyn and
Ji, Heng and
Stent, Amanda",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-1193/",
doi = "10.18653/v1/N18-1193",
pages = "2122--2132",
abstract = "Emotion recognition in conversations is crucial for the development of empathetic machines. Present methods mostly ignore the role of inter-speaker dependency relations while classifying emotions in conversations. In this paper, we address recognizing utterance-level emotions in dyadic conversational videos. We propose a deep neural framework, termed Conversational Memory Network (CMN), which leverages contextual information from the conversation history. In particular, CMN uses multimodal approach comprising audio, visual and textual features with gated recurrent units to model past utterances of each speaker into memories. These memories are then merged using attention-based hops to capture inter-speaker dependencies. Experiments show a significant improvement of 3 {\ensuremath{-}} 4{\%} in accuracy over the state of the art."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hazarika-etal-2018-conversational">
<titleInfo>
<title>Conversational Memory Network for Emotion Recognition in Dyadic Dialogue Videos</title>
</titleInfo>
<name type="personal">
<namePart type="given">Devamanyu</namePart>
<namePart type="family">Hazarika</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Cambria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis-Philippe</namePart>
<namePart type="family">Morency</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roger</namePart>
<namePart type="family">Zimmermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marilyn</namePart>
<namePart type="family">Walker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Emotion recognition in conversations is crucial for the development of empathetic machines. Present methods mostly ignore the role of inter-speaker dependency relations while classifying emotions in conversations. In this paper, we address recognizing utterance-level emotions in dyadic conversational videos. We propose a deep neural framework, termed Conversational Memory Network (CMN), which leverages contextual information from the conversation history. In particular, CMN uses multimodal approach comprising audio, visual and textual features with gated recurrent units to model past utterances of each speaker into memories. These memories are then merged using attention-based hops to capture inter-speaker dependencies. Experiments show a significant improvement of 3 \ensuremath- 4% in accuracy over the state of the art.</abstract>
<identifier type="citekey">hazarika-etal-2018-conversational</identifier>
<identifier type="doi">10.18653/v1/N18-1193</identifier>
<location>
<url>https://aclanthology.org/N18-1193/</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>2122</start>
<end>2132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Conversational Memory Network for Emotion Recognition in Dyadic Dialogue Videos
%A Hazarika, Devamanyu
%A Poria, Soujanya
%A Zadeh, Amir
%A Cambria, Erik
%A Morency, Louis-Philippe
%A Zimmermann, Roger
%Y Walker, Marilyn
%Y Ji, Heng
%Y Stent, Amanda
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F hazarika-etal-2018-conversational
%X Emotion recognition in conversations is crucial for the development of empathetic machines. Present methods mostly ignore the role of inter-speaker dependency relations while classifying emotions in conversations. In this paper, we address recognizing utterance-level emotions in dyadic conversational videos. We propose a deep neural framework, termed Conversational Memory Network (CMN), which leverages contextual information from the conversation history. In particular, CMN uses multimodal approach comprising audio, visual and textual features with gated recurrent units to model past utterances of each speaker into memories. These memories are then merged using attention-based hops to capture inter-speaker dependencies. Experiments show a significant improvement of 3 \ensuremath- 4% in accuracy over the state of the art.
%R 10.18653/v1/N18-1193
%U https://aclanthology.org/N18-1193/
%U https://doi.org/10.18653/v1/N18-1193
%P 2122-2132
Markdown (Informal)
[Conversational Memory Network for Emotion Recognition in Dyadic Dialogue Videos](https://aclanthology.org/N18-1193/) (Hazarika et al., NAACL 2018)
ACL
- Devamanyu Hazarika, Soujanya Poria, Amir Zadeh, Erik Cambria, Louis-Philippe Morency, and Roger Zimmermann. 2018. Conversational Memory Network for Emotion Recognition in Dyadic Dialogue Videos. In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pages 2122–2132, New Orleans, Louisiana. Association for Computational Linguistics.