@inproceedings{pezzelle-fernandez-2019-red,
title = "Is the Red Square Big? {MAL}e{V}i{C}: Modeling Adjectives Leveraging Visual Contexts",
author = "Pezzelle, Sandro and
Fern{\'a}ndez, Raquel",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1285",
doi = "10.18653/v1/D19-1285",
pages = "2865--2876",
abstract = "This work aims at modeling how the meaning of gradable adjectives of size ({`}big{'}, {`}small{'}) can be learned from visually-grounded contexts. Inspired by cognitive and linguistic evidence showing that the use of these expressions relies on setting a threshold that is dependent on a specific context, we investigate the ability of multi-modal models in assessing whether an object is {`}big{'} or {`}small{'} in a given visual scene. In contrast with the standard computational approach that simplistically treats gradable adjectives as {`}fixed{'} attributes, we pose the problem as relational: to be successful, a model has to consider the full visual context. By means of four main tasks, we show that state-of-the-art models (but not a relatively strong baseline) can learn the function subtending the meaning of size adjectives, though their performance is found to decrease while moving from simple to more complex tasks. Crucially, models fail in developing abstract representations of gradable adjectives that can be used compositionally.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pezzelle-fernandez-2019-red">
<titleInfo>
<title>Is the Red Square Big? MALeViC: Modeling Adjectives Leveraging Visual Contexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work aims at modeling how the meaning of gradable adjectives of size (‘big’, ‘small’) can be learned from visually-grounded contexts. Inspired by cognitive and linguistic evidence showing that the use of these expressions relies on setting a threshold that is dependent on a specific context, we investigate the ability of multi-modal models in assessing whether an object is ‘big’ or ‘small’ in a given visual scene. In contrast with the standard computational approach that simplistically treats gradable adjectives as ‘fixed’ attributes, we pose the problem as relational: to be successful, a model has to consider the full visual context. By means of four main tasks, we show that state-of-the-art models (but not a relatively strong baseline) can learn the function subtending the meaning of size adjectives, though their performance is found to decrease while moving from simple to more complex tasks. Crucially, models fail in developing abstract representations of gradable adjectives that can be used compositionally.</abstract>
<identifier type="citekey">pezzelle-fernandez-2019-red</identifier>
<identifier type="doi">10.18653/v1/D19-1285</identifier>
<location>
<url>https://aclanthology.org/D19-1285</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>2865</start>
<end>2876</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is the Red Square Big? MALeViC: Modeling Adjectives Leveraging Visual Contexts
%A Pezzelle, Sandro
%A Fernández, Raquel
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F pezzelle-fernandez-2019-red
%X This work aims at modeling how the meaning of gradable adjectives of size (‘big’, ‘small’) can be learned from visually-grounded contexts. Inspired by cognitive and linguistic evidence showing that the use of these expressions relies on setting a threshold that is dependent on a specific context, we investigate the ability of multi-modal models in assessing whether an object is ‘big’ or ‘small’ in a given visual scene. In contrast with the standard computational approach that simplistically treats gradable adjectives as ‘fixed’ attributes, we pose the problem as relational: to be successful, a model has to consider the full visual context. By means of four main tasks, we show that state-of-the-art models (but not a relatively strong baseline) can learn the function subtending the meaning of size adjectives, though their performance is found to decrease while moving from simple to more complex tasks. Crucially, models fail in developing abstract representations of gradable adjectives that can be used compositionally.
%R 10.18653/v1/D19-1285
%U https://aclanthology.org/D19-1285
%U https://doi.org/10.18653/v1/D19-1285
%P 2865-2876
Markdown (Informal)
[Is the Red Square Big? MALeViC: Modeling Adjectives Leveraging Visual Contexts](https://aclanthology.org/D19-1285) (Pezzelle & Fernández, EMNLP-IJCNLP 2019)
ACL