@inproceedings{singhal-etal-2019-learning,
title = "Learning Multilingual Word Embeddings Using Image-Text Data",
author = "Singhal, Karan and
Raman, Karthik and
ten Cate, Balder",
editor = "Bernardi, Raffaella and
Fernandez, Raquel and
Gella, Spandana and
Kafle, Kushal and
Kanan, Christopher and
Lee, Stefan and
Nabi, Moin",
booktitle = "Proceedings of the Second Workshop on Shortcomings in Vision and Language",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-1807",
doi = "10.18653/v1/W19-1807",
pages = "68--77",
abstract = "There has been significant interest recently in learning multilingual word embeddings {--} in which semantically similar words across languages have similar embeddings. State-of-the-art approaches have relied on expensive labeled data, which is unavailable for low-resource languages, or have involved post-hoc unification of monolingual embeddings. In the present paper, we investigate the efficacy of multilingual embeddings learned from weakly-supervised image-text data. In particular, we propose methods for learning multilingual embeddings using image-text data, by enforcing similarity between the representations of the image and that of the text. Our experiments reveal that even without using any expensive labeled data, a bag-of-words-based embedding model trained on image-text data achieves performance comparable to the state-of-the-art on crosslingual semantic similarity tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singhal-etal-2019-learning">
<titleInfo>
<title>Learning Multilingual Word Embeddings Using Image-Text Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karan</namePart>
<namePart type="family">Singhal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karthik</namePart>
<namePart type="family">Raman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balder</namePart>
<namePart type="family">ten Cate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Shortcomings in Vision and Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raffaella</namePart>
<namePart type="family">Bernardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kushal</namePart>
<namePart type="family">Kafle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Kanan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moin</namePart>
<namePart type="family">Nabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>There has been significant interest recently in learning multilingual word embeddings – in which semantically similar words across languages have similar embeddings. State-of-the-art approaches have relied on expensive labeled data, which is unavailable for low-resource languages, or have involved post-hoc unification of monolingual embeddings. In the present paper, we investigate the efficacy of multilingual embeddings learned from weakly-supervised image-text data. In particular, we propose methods for learning multilingual embeddings using image-text data, by enforcing similarity between the representations of the image and that of the text. Our experiments reveal that even without using any expensive labeled data, a bag-of-words-based embedding model trained on image-text data achieves performance comparable to the state-of-the-art on crosslingual semantic similarity tasks.</abstract>
<identifier type="citekey">singhal-etal-2019-learning</identifier>
<identifier type="doi">10.18653/v1/W19-1807</identifier>
<location>
<url>https://aclanthology.org/W19-1807</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>68</start>
<end>77</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Multilingual Word Embeddings Using Image-Text Data
%A Singhal, Karan
%A Raman, Karthik
%A ten Cate, Balder
%Y Bernardi, Raffaella
%Y Fernandez, Raquel
%Y Gella, Spandana
%Y Kafle, Kushal
%Y Kanan, Christopher
%Y Lee, Stefan
%Y Nabi, Moin
%S Proceedings of the Second Workshop on Shortcomings in Vision and Language
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F singhal-etal-2019-learning
%X There has been significant interest recently in learning multilingual word embeddings – in which semantically similar words across languages have similar embeddings. State-of-the-art approaches have relied on expensive labeled data, which is unavailable for low-resource languages, or have involved post-hoc unification of monolingual embeddings. In the present paper, we investigate the efficacy of multilingual embeddings learned from weakly-supervised image-text data. In particular, we propose methods for learning multilingual embeddings using image-text data, by enforcing similarity between the representations of the image and that of the text. Our experiments reveal that even without using any expensive labeled data, a bag-of-words-based embedding model trained on image-text data achieves performance comparable to the state-of-the-art on crosslingual semantic similarity tasks.
%R 10.18653/v1/W19-1807
%U https://aclanthology.org/W19-1807
%U https://doi.org/10.18653/v1/W19-1807
%P 68-77
Markdown (Informal)
[Learning Multilingual Word Embeddings Using Image-Text Data](https://aclanthology.org/W19-1807) (Singhal et al., NAACL 2019)
ACL