@inproceedings{singh-jv-2020-ai,
title = "{AI} {S}ensing for Robotics using Deep Learning based Visual and Language Modeling",
author = "Singh, Yuvaram and
JV, Kameshwar Rao",
editor = "Zadeh, Amir and
Morency, Louis-Philippe and
Liang, Paul Pu and
Poria, Soujanya",
booktitle = "Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)",
month = jul,
year = "2020",
address = "Seattle, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.challengehml-1.8",
doi = "10.18653/v1/2020.challengehml-1.8",
pages = "60--63",
abstract = "An artificial intelligence(AI) system should be capable of processing the sensory inputs to extract both task-specific and general information about its environment. However, most of the existing algorithms extract only task specific information. In this work, an innovative approach to address the problem of processing visual sensory data is presented by utilizing convolutional neural network (CNN). It recognizes and represents the physical and semantic nature of the surrounding in both human readable and machine processable format. This work utilizes the image captioning model to capture the semantics of the input image and a modular design to generate a probability distribution for semantic topics. It gives any autonomous system the ability to process visual information in a human-like way and generates more insights which are hardly possible with a conventional algorithm. Here a model and data collection method are proposed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-jv-2020-ai">
<titleInfo>
<title>AI Sensing for Robotics using Deep Learning based Visual and Language Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuvaram</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kameshwar</namePart>
<namePart type="given">Rao</namePart>
<namePart type="family">JV</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis-Philippe</namePart>
<namePart type="family">Morency</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="given">Pu</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An artificial intelligence(AI) system should be capable of processing the sensory inputs to extract both task-specific and general information about its environment. However, most of the existing algorithms extract only task specific information. In this work, an innovative approach to address the problem of processing visual sensory data is presented by utilizing convolutional neural network (CNN). It recognizes and represents the physical and semantic nature of the surrounding in both human readable and machine processable format. This work utilizes the image captioning model to capture the semantics of the input image and a modular design to generate a probability distribution for semantic topics. It gives any autonomous system the ability to process visual information in a human-like way and generates more insights which are hardly possible with a conventional algorithm. Here a model and data collection method are proposed.</abstract>
<identifier type="citekey">singh-jv-2020-ai</identifier>
<identifier type="doi">10.18653/v1/2020.challengehml-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.challengehml-1.8</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>60</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AI Sensing for Robotics using Deep Learning based Visual and Language Modeling
%A Singh, Yuvaram
%A JV, Kameshwar Rao
%Y Zadeh, Amir
%Y Morency, Louis-Philippe
%Y Liang, Paul Pu
%Y Poria, Soujanya
%S Second Grand-Challenge and Workshop on Multimodal Language (Challenge-HML)
%D 2020
%8 July
%I Association for Computational Linguistics
%C Seattle, USA
%F singh-jv-2020-ai
%X An artificial intelligence(AI) system should be capable of processing the sensory inputs to extract both task-specific and general information about its environment. However, most of the existing algorithms extract only task specific information. In this work, an innovative approach to address the problem of processing visual sensory data is presented by utilizing convolutional neural network (CNN). It recognizes and represents the physical and semantic nature of the surrounding in both human readable and machine processable format. This work utilizes the image captioning model to capture the semantics of the input image and a modular design to generate a probability distribution for semantic topics. It gives any autonomous system the ability to process visual information in a human-like way and generates more insights which are hardly possible with a conventional algorithm. Here a model and data collection method are proposed.
%R 10.18653/v1/2020.challengehml-1.8
%U https://aclanthology.org/2020.challengehml-1.8
%U https://doi.org/10.18653/v1/2020.challengehml-1.8
%P 60-63
Markdown (Informal)
[AI Sensing for Robotics using Deep Learning based Visual and Language Modeling](https://aclanthology.org/2020.challengehml-1.8) (Singh & JV, Challenge-HML 2020)
ACL