@inproceedings{galvan-sosa-etal-2020-seeing,
title = "Seeing the World through Text: Evaluating Image Descriptions for Commonsense Reasoning in Machine Reading Comprehension",
author = "Galvan-Sosa, Diana and
Suzuki, Jun and
Nishida, Kyosuke and
Matsuda, Koji and
Inui, Kentaro",
editor = "Mogadala, Aditya and
Pezzelle, Sandro and
Klakow, Dietrich and
Moens, Marie-Francine and
Akata, Zeynep",
booktitle = "Proceedings of the Second Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)",
month = dec,
year = "2020",
address = "Barcelona, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.lantern-1.3/",
pages = "23--29",
abstract = "Despite recent achievements in natural language understanding, reasoning over commonsense knowledge still represents a big challenge to AI systems. As the name suggests, common sense is related to perception and as such, humans derive it from experience rather than from literary education. Recent works in the NLP and the computer vision field have made the effort of making such knowledge explicit using written language and visual inputs, respectively. Our premise is that the latter source fits better with the characteristics of commonsense acquisition. In this work, we explore to what extent the descriptions of real-world scenes are sufficient to learn common sense about different daily situations, drawing upon visual information to answer script knowledge questions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="galvan-sosa-etal-2020-seeing">
<titleInfo>
<title>Seeing the World through Text: Evaluating Image Descriptions for Commonsense Reasoning in Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Galvan-Sosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyosuke</namePart>
<namePart type="family">Nishida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koji</namePart>
<namePart type="family">Matsuda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Mogadala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeynep</namePart>
<namePart type="family">Akata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite recent achievements in natural language understanding, reasoning over commonsense knowledge still represents a big challenge to AI systems. As the name suggests, common sense is related to perception and as such, humans derive it from experience rather than from literary education. Recent works in the NLP and the computer vision field have made the effort of making such knowledge explicit using written language and visual inputs, respectively. Our premise is that the latter source fits better with the characteristics of commonsense acquisition. In this work, we explore to what extent the descriptions of real-world scenes are sufficient to learn common sense about different daily situations, drawing upon visual information to answer script knowledge questions.</abstract>
<identifier type="citekey">galvan-sosa-etal-2020-seeing</identifier>
<location>
<url>https://aclanthology.org/2020.lantern-1.3/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>23</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Seeing the World through Text: Evaluating Image Descriptions for Commonsense Reasoning in Machine Reading Comprehension
%A Galvan-Sosa, Diana
%A Suzuki, Jun
%A Nishida, Kyosuke
%A Matsuda, Koji
%A Inui, Kentaro
%Y Mogadala, Aditya
%Y Pezzelle, Sandro
%Y Klakow, Dietrich
%Y Moens, Marie-Francine
%Y Akata, Zeynep
%S Proceedings of the Second Workshop on Beyond Vision and LANguage: inTEgrating Real-world kNowledge (LANTERN)
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain
%F galvan-sosa-etal-2020-seeing
%X Despite recent achievements in natural language understanding, reasoning over commonsense knowledge still represents a big challenge to AI systems. As the name suggests, common sense is related to perception and as such, humans derive it from experience rather than from literary education. Recent works in the NLP and the computer vision field have made the effort of making such knowledge explicit using written language and visual inputs, respectively. Our premise is that the latter source fits better with the characteristics of commonsense acquisition. In this work, we explore to what extent the descriptions of real-world scenes are sufficient to learn common sense about different daily situations, drawing upon visual information to answer script knowledge questions.
%U https://aclanthology.org/2020.lantern-1.3/
%P 23-29
Markdown (Informal)
[Seeing the World through Text: Evaluating Image Descriptions for Commonsense Reasoning in Machine Reading Comprehension](https://aclanthology.org/2020.lantern-1.3/) (Galvan-Sosa et al., LANTERN 2020)
ACL