@inproceedings{firdaus-etal-2020-multidm,
title = "{M}ulti{DM}-{GCN}: Aspect-guided Response Generation in Multi-domain Multi-modal Dialogue System using Graph Convolutional Network",
author = "Firdaus, Mauajama and
Thakur, Nidhi and
Ekbal, Asif",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.210",
doi = "10.18653/v1/2020.findings-emnlp.210",
pages = "2318--2328",
abstract = "In the recent past, dialogue systems have gained immense popularity and have become ubiquitous. During conversations, humans not only rely on languages but seek contextual information through visual contents as well. In every task-oriented dialogue system, the user is guided by the different aspects of a product or service that regulates the conversation towards selecting the product or service. In this work, we present a multi-modal conversational framework for a task-oriented dialogue setup that generates the responses following the different aspects of a product or service to cater to the user{'}s needs. We show that the responses guided by the aspect information provide more interactive and informative responses for better communication between the agent and the user. We first create a Multi-domain Multi-modal Dialogue (MDMMD) dataset having conversations involving both text and images belonging to the three different domains, such as restaurants, electronics, and furniture. We implement a Graph Convolutional Network (GCN) based framework that generates appropriate textual responses from the multi-modal inputs. The multi-modal information having both textual and image representation is fed to the decoder and the aspect information for generating aspect guided responses. Quantitative and qualitative analyses show that the proposed methodology outperforms several baselines for the proposed task of aspect-guided response generation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="firdaus-etal-2020-multidm">
<titleInfo>
<title>MultiDM-GCN: Aspect-guided Response Generation in Multi-domain Multi-modal Dialogue System using Graph Convolutional Network</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mauajama</namePart>
<namePart type="family">Firdaus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nidhi</namePart>
<namePart type="family">Thakur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the recent past, dialogue systems have gained immense popularity and have become ubiquitous. During conversations, humans not only rely on languages but seek contextual information through visual contents as well. In every task-oriented dialogue system, the user is guided by the different aspects of a product or service that regulates the conversation towards selecting the product or service. In this work, we present a multi-modal conversational framework for a task-oriented dialogue setup that generates the responses following the different aspects of a product or service to cater to the user’s needs. We show that the responses guided by the aspect information provide more interactive and informative responses for better communication between the agent and the user. We first create a Multi-domain Multi-modal Dialogue (MDMMD) dataset having conversations involving both text and images belonging to the three different domains, such as restaurants, electronics, and furniture. We implement a Graph Convolutional Network (GCN) based framework that generates appropriate textual responses from the multi-modal inputs. The multi-modal information having both textual and image representation is fed to the decoder and the aspect information for generating aspect guided responses. Quantitative and qualitative analyses show that the proposed methodology outperforms several baselines for the proposed task of aspect-guided response generation.</abstract>
<identifier type="citekey">firdaus-etal-2020-multidm</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.210</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.210</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>2318</start>
<end>2328</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MultiDM-GCN: Aspect-guided Response Generation in Multi-domain Multi-modal Dialogue System using Graph Convolutional Network
%A Firdaus, Mauajama
%A Thakur, Nidhi
%A Ekbal, Asif
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F firdaus-etal-2020-multidm
%X In the recent past, dialogue systems have gained immense popularity and have become ubiquitous. During conversations, humans not only rely on languages but seek contextual information through visual contents as well. In every task-oriented dialogue system, the user is guided by the different aspects of a product or service that regulates the conversation towards selecting the product or service. In this work, we present a multi-modal conversational framework for a task-oriented dialogue setup that generates the responses following the different aspects of a product or service to cater to the user’s needs. We show that the responses guided by the aspect information provide more interactive and informative responses for better communication between the agent and the user. We first create a Multi-domain Multi-modal Dialogue (MDMMD) dataset having conversations involving both text and images belonging to the three different domains, such as restaurants, electronics, and furniture. We implement a Graph Convolutional Network (GCN) based framework that generates appropriate textual responses from the multi-modal inputs. The multi-modal information having both textual and image representation is fed to the decoder and the aspect information for generating aspect guided responses. Quantitative and qualitative analyses show that the proposed methodology outperforms several baselines for the proposed task of aspect-guided response generation.
%R 10.18653/v1/2020.findings-emnlp.210
%U https://aclanthology.org/2020.findings-emnlp.210
%U https://doi.org/10.18653/v1/2020.findings-emnlp.210
%P 2318-2328
Markdown (Informal)
[MultiDM-GCN: Aspect-guided Response Generation in Multi-domain Multi-modal Dialogue System using Graph Convolutional Network](https://aclanthology.org/2020.findings-emnlp.210) (Firdaus et al., Findings 2020)
ACL