@inproceedings{zarriess-schlangen-2017-deriving,
title = "Deriving continous grounded meaning representations from referentially structured multimodal contexts",
author = "Zarrie{\ss}, Sina and
Schlangen, David",
editor = "Palmer, Martha and
Hwa, Rebecca and
Riedel, Sebastian",
booktitle = "Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D17-1100",
doi = "10.18653/v1/D17-1100",
pages = "959--965",
abstract = "Corpora of referring expressions paired with their visual referents are a good source for learning word meanings directly grounded in visual representations. Here, we explore additional ways of extracting from them word representations linked to multi-modal context: through expressions that refer to the same object, and through expressions that refer to different objects in the same scene. We show that continuous meaning representations derived from these contexts capture complementary aspects of similarity, , even if not outperforming textual embeddings trained on very large amounts of raw text when tested on standard similarity benchmarks. We propose a new task for evaluating grounded meaning representations{---}detection of potentially co-referential phrases{---}and show that it requires precise denotational representations of attribute meanings, which our method provides.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zarriess-schlangen-2017-deriving">
<titleInfo>
<title>Deriving continous grounded meaning representations from referentially structured multimodal contexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sina</namePart>
<namePart type="family">Zarrieß</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Hwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Riedel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Corpora of referring expressions paired with their visual referents are a good source for learning word meanings directly grounded in visual representations. Here, we explore additional ways of extracting from them word representations linked to multi-modal context: through expressions that refer to the same object, and through expressions that refer to different objects in the same scene. We show that continuous meaning representations derived from these contexts capture complementary aspects of similarity, , even if not outperforming textual embeddings trained on very large amounts of raw text when tested on standard similarity benchmarks. We propose a new task for evaluating grounded meaning representations—detection of potentially co-referential phrases—and show that it requires precise denotational representations of attribute meanings, which our method provides.</abstract>
<identifier type="citekey">zarriess-schlangen-2017-deriving</identifier>
<identifier type="doi">10.18653/v1/D17-1100</identifier>
<location>
<url>https://aclanthology.org/D17-1100</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>959</start>
<end>965</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deriving continous grounded meaning representations from referentially structured multimodal contexts
%A Zarrieß, Sina
%A Schlangen, David
%Y Palmer, Martha
%Y Hwa, Rebecca
%Y Riedel, Sebastian
%S Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F zarriess-schlangen-2017-deriving
%X Corpora of referring expressions paired with their visual referents are a good source for learning word meanings directly grounded in visual representations. Here, we explore additional ways of extracting from them word representations linked to multi-modal context: through expressions that refer to the same object, and through expressions that refer to different objects in the same scene. We show that continuous meaning representations derived from these contexts capture complementary aspects of similarity, , even if not outperforming textual embeddings trained on very large amounts of raw text when tested on standard similarity benchmarks. We propose a new task for evaluating grounded meaning representations—detection of potentially co-referential phrases—and show that it requires precise denotational representations of attribute meanings, which our method provides.
%R 10.18653/v1/D17-1100
%U https://aclanthology.org/D17-1100
%U https://doi.org/10.18653/v1/D17-1100
%P 959-965
Markdown (Informal)
[Deriving continous grounded meaning representations from referentially structured multimodal contexts](https://aclanthology.org/D17-1100) (Zarrieß & Schlangen, EMNLP 2017)
ACL