@inproceedings{krishnaswamy-alalyani-2021-embodied,
title = "Embodied Multimodal Agents to Bridge the Understanding Gap",
author = "Krishnaswamy, Nikhil and
Alalyani, Nada",
editor = "Blodgett, Su Lin and
Madaio, Michael and
O'Connor, Brendan and
Wallach, Hanna and
Yang, Qian",
booktitle = "Proceedings of the First Workshop on Bridging Human{--}Computer Interaction and Natural Language Processing",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.hcinlp-1.7",
pages = "41--46",
abstract = "In this paper we argue that embodied multimodal agents, i.e., avatars, can play an important role in moving natural language processing toward {``}deep understanding.{''} Fully-featured interactive agents, model encounters between two {``}people,{''} but a language-only agent has little environmental and situational awareness. Multimodal agents bring new opportunities for interpreting visuals, locational information, gestures, etc., which are more axes along which to communicate. We propose that multimodal agents, by facilitating an embodied form of human-computer interaction, provide additional structure that can be used to train models that move NLP systems closer to genuine {``}understanding{''} of grounded language, and we discuss ongoing studies using existing systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krishnaswamy-alalyani-2021-embodied">
<titleInfo>
<title>Embodied Multimodal Agents to Bridge the Understanding Gap</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikhil</namePart>
<namePart type="family">Krishnaswamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nada</namePart>
<namePart type="family">Alalyani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Bridging Human–Computer Interaction and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Su</namePart>
<namePart type="given">Lin</namePart>
<namePart type="family">Blodgett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Madaio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Wallach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we argue that embodied multimodal agents, i.e., avatars, can play an important role in moving natural language processing toward “deep understanding.” Fully-featured interactive agents, model encounters between two “people,” but a language-only agent has little environmental and situational awareness. Multimodal agents bring new opportunities for interpreting visuals, locational information, gestures, etc., which are more axes along which to communicate. We propose that multimodal agents, by facilitating an embodied form of human-computer interaction, provide additional structure that can be used to train models that move NLP systems closer to genuine “understanding” of grounded language, and we discuss ongoing studies using existing systems.</abstract>
<identifier type="citekey">krishnaswamy-alalyani-2021-embodied</identifier>
<location>
<url>https://aclanthology.org/2021.hcinlp-1.7</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>41</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Embodied Multimodal Agents to Bridge the Understanding Gap
%A Krishnaswamy, Nikhil
%A Alalyani, Nada
%Y Blodgett, Su Lin
%Y Madaio, Michael
%Y O’Connor, Brendan
%Y Wallach, Hanna
%Y Yang, Qian
%S Proceedings of the First Workshop on Bridging Human–Computer Interaction and Natural Language Processing
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F krishnaswamy-alalyani-2021-embodied
%X In this paper we argue that embodied multimodal agents, i.e., avatars, can play an important role in moving natural language processing toward “deep understanding.” Fully-featured interactive agents, model encounters between two “people,” but a language-only agent has little environmental and situational awareness. Multimodal agents bring new opportunities for interpreting visuals, locational information, gestures, etc., which are more axes along which to communicate. We propose that multimodal agents, by facilitating an embodied form of human-computer interaction, provide additional structure that can be used to train models that move NLP systems closer to genuine “understanding” of grounded language, and we discuss ongoing studies using existing systems.
%U https://aclanthology.org/2021.hcinlp-1.7
%P 41-46
Markdown (Informal)
[Embodied Multimodal Agents to Bridge the Understanding Gap](https://aclanthology.org/2021.hcinlp-1.7) (Krishnaswamy & Alalyani, HCINLP 2021)
ACL