@inproceedings{berlot-attwell-etal-2024-attribute,
title = "Attribute Diversity Determines the Systematicity Gap in {VQA}",
author = "Berlot-Attwell, Ian and
Agrawal, Kumar and
Carrell, Annabelle and
Sharma, Yash and
Saphra, Naomi",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.537",
pages = "9576--9611",
abstract = "Although modern neural networks often generalize to new combinations of familiar concepts, the conditions that enable such compositionality have long been an open question. In this work, we study the systematicity gap in visual question answering: the performance difference between reasoning on previously seen and unseen combinations of object attributes. To test, we introduce a novel diagnostic dataset, CLEVR-HOPE. We find that the systematicity gap is not reduced by increasing the quantity of training data, but is reduced by increasing the diversity of training data. In particular, our experiments suggest that the more distinct attribute type combinations are seen during training, the more systematic we can expect the resulting model to be.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="berlot-attwell-etal-2024-attribute">
<titleInfo>
<title>Attribute Diversity Determines the Systematicity Gap in VQA</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ian</namePart>
<namePart type="family">Berlot-Attwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kumar</namePart>
<namePart type="family">Agrawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annabelle</namePart>
<namePart type="family">Carrell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Saphra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although modern neural networks often generalize to new combinations of familiar concepts, the conditions that enable such compositionality have long been an open question. In this work, we study the systematicity gap in visual question answering: the performance difference between reasoning on previously seen and unseen combinations of object attributes. To test, we introduce a novel diagnostic dataset, CLEVR-HOPE. We find that the systematicity gap is not reduced by increasing the quantity of training data, but is reduced by increasing the diversity of training data. In particular, our experiments suggest that the more distinct attribute type combinations are seen during training, the more systematic we can expect the resulting model to be.</abstract>
<identifier type="citekey">berlot-attwell-etal-2024-attribute</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.537</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9576</start>
<end>9611</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Attribute Diversity Determines the Systematicity Gap in VQA
%A Berlot-Attwell, Ian
%A Agrawal, Kumar
%A Carrell, Annabelle
%A Sharma, Yash
%A Saphra, Naomi
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F berlot-attwell-etal-2024-attribute
%X Although modern neural networks often generalize to new combinations of familiar concepts, the conditions that enable such compositionality have long been an open question. In this work, we study the systematicity gap in visual question answering: the performance difference between reasoning on previously seen and unseen combinations of object attributes. To test, we introduce a novel diagnostic dataset, CLEVR-HOPE. We find that the systematicity gap is not reduced by increasing the quantity of training data, but is reduced by increasing the diversity of training data. In particular, our experiments suggest that the more distinct attribute type combinations are seen during training, the more systematic we can expect the resulting model to be.
%U https://aclanthology.org/2024.emnlp-main.537
%P 9576-9611
Markdown (Informal)
[Attribute Diversity Determines the Systematicity Gap in VQA](https://aclanthology.org/2024.emnlp-main.537) (Berlot-Attwell et al., EMNLP 2024)
ACL
- Ian Berlot-Attwell, Kumar Agrawal, Annabelle Carrell, Yash Sharma, and Naomi Saphra. 2024. Attribute Diversity Determines the Systematicity Gap in VQA. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 9576–9611, Miami, Florida, USA. Association for Computational Linguistics.