@inproceedings{lala-etal-2019-grounded,
title = "Grounded Word Sense Translation",
author = "Lala, Chiraag and
Madhyastha, Pranava and
Specia, Lucia",
editor = "Bernardi, Raffaella and
Fernandez, Raquel and
Gella, Spandana and
Kafle, Kushal and
Kanan, Christopher and
Lee, Stefan and
Nabi, Moin",
booktitle = "Proceedings of the Second Workshop on Shortcomings in Vision and Language",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-1808",
doi = "10.18653/v1/W19-1808",
pages = "78--85",
abstract = "Recent work on visually grounded language learning has focused on broader applications of grounded representations, such as visual question answering and multimodal machine translation. In this paper we consider grounded word sense translation, i.e. the task of correctly translating an ambiguous source word given the corresponding textual and visual context. Our main objective is to investigate the extent to which images help improve word-level (lexical) translation quality. We do so by first studying the dataset for this task to understand the scope and challenges of the task. We then explore different data settings, image features, and ways of grounding to investigate the gain from using images in each of the combinations. We find that grounding on the image is specially beneficial in weaker unidirectional recurrent translation models. We observe that adding structured image information leads to stronger gains in lexical translation accuracy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lala-etal-2019-grounded">
<titleInfo>
<title>Grounded Word Sense Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chiraag</namePart>
<namePart type="family">Lala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranava</namePart>
<namePart type="family">Madhyastha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Shortcomings in Vision and Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raffaella</namePart>
<namePart type="family">Bernardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kushal</namePart>
<namePart type="family">Kafle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Kanan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moin</namePart>
<namePart type="family">Nabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent work on visually grounded language learning has focused on broader applications of grounded representations, such as visual question answering and multimodal machine translation. In this paper we consider grounded word sense translation, i.e. the task of correctly translating an ambiguous source word given the corresponding textual and visual context. Our main objective is to investigate the extent to which images help improve word-level (lexical) translation quality. We do so by first studying the dataset for this task to understand the scope and challenges of the task. We then explore different data settings, image features, and ways of grounding to investigate the gain from using images in each of the combinations. We find that grounding on the image is specially beneficial in weaker unidirectional recurrent translation models. We observe that adding structured image information leads to stronger gains in lexical translation accuracy.</abstract>
<identifier type="citekey">lala-etal-2019-grounded</identifier>
<identifier type="doi">10.18653/v1/W19-1808</identifier>
<location>
<url>https://aclanthology.org/W19-1808</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>78</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grounded Word Sense Translation
%A Lala, Chiraag
%A Madhyastha, Pranava
%A Specia, Lucia
%Y Bernardi, Raffaella
%Y Fernandez, Raquel
%Y Gella, Spandana
%Y Kafle, Kushal
%Y Kanan, Christopher
%Y Lee, Stefan
%Y Nabi, Moin
%S Proceedings of the Second Workshop on Shortcomings in Vision and Language
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F lala-etal-2019-grounded
%X Recent work on visually grounded language learning has focused on broader applications of grounded representations, such as visual question answering and multimodal machine translation. In this paper we consider grounded word sense translation, i.e. the task of correctly translating an ambiguous source word given the corresponding textual and visual context. Our main objective is to investigate the extent to which images help improve word-level (lexical) translation quality. We do so by first studying the dataset for this task to understand the scope and challenges of the task. We then explore different data settings, image features, and ways of grounding to investigate the gain from using images in each of the combinations. We find that grounding on the image is specially beneficial in weaker unidirectional recurrent translation models. We observe that adding structured image information leads to stronger gains in lexical translation accuracy.
%R 10.18653/v1/W19-1808
%U https://aclanthology.org/W19-1808
%U https://doi.org/10.18653/v1/W19-1808
%P 78-85
Markdown (Informal)
[Grounded Word Sense Translation](https://aclanthology.org/W19-1808) (Lala et al., NAACL 2019)
ACL
- Chiraag Lala, Pranava Madhyastha, and Lucia Specia. 2019. Grounded Word Sense Translation. In Proceedings of the Second Workshop on Shortcomings in Vision and Language, pages 78–85, Minneapolis, Minnesota. Association for Computational Linguistics.