@inproceedings{xu-etal-2023-metarevision,
title = "{M}eta{R}e{V}ision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition",
author = "Xu, Guangyue and
Kordjamshidi, Parisa and
Chai, Joyce",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.818",
doi = "10.18653/v1/2023.findings-emnlp.818",
pages = "12224--12236",
abstract = "Humans have the ability to learn novel compositional concepts by recalling primitive concepts acquired from past experience and generalizing these primitive concepts to novel compositions. Inspired by the above human{'}s compositional learning procedure, in this paper, we propose MetaReVision, a retrievalenhanced meta-learning model to solve the visually grounded compositional concept learning problem. The proposed MetaReVision consists of a retrieval module and a meta-learning module which are designed to incorporate retrieved primitive concepts as supporting set to meta-train visual-language models for grounded compositional concept recognition. Through meta-learning from episodes constructed by the retriever, MetaReVision learns a generic compositional representation that can be fast updated to recognize novel composi tional concepts. We create CompCOCO and CompFlickr to benchmark the grounded compositional concept learning. Our experimental results show MetaReVision outperforms other competitive baselines and the retrieval module does plays an important role in this compositional learning process.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2023-metarevision">
<titleInfo>
<title>MetaReVision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Guangyue</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parisa</namePart>
<namePart type="family">Kordjamshidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Humans have the ability to learn novel compositional concepts by recalling primitive concepts acquired from past experience and generalizing these primitive concepts to novel compositions. Inspired by the above human’s compositional learning procedure, in this paper, we propose MetaReVision, a retrievalenhanced meta-learning model to solve the visually grounded compositional concept learning problem. The proposed MetaReVision consists of a retrieval module and a meta-learning module which are designed to incorporate retrieved primitive concepts as supporting set to meta-train visual-language models for grounded compositional concept recognition. Through meta-learning from episodes constructed by the retriever, MetaReVision learns a generic compositional representation that can be fast updated to recognize novel composi tional concepts. We create CompCOCO and CompFlickr to benchmark the grounded compositional concept learning. Our experimental results show MetaReVision outperforms other competitive baselines and the retrieval module does plays an important role in this compositional learning process.</abstract>
<identifier type="citekey">xu-etal-2023-metarevision</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.818</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.818</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>12224</start>
<end>12236</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MetaReVision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition
%A Xu, Guangyue
%A Kordjamshidi, Parisa
%A Chai, Joyce
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F xu-etal-2023-metarevision
%X Humans have the ability to learn novel compositional concepts by recalling primitive concepts acquired from past experience and generalizing these primitive concepts to novel compositions. Inspired by the above human’s compositional learning procedure, in this paper, we propose MetaReVision, a retrievalenhanced meta-learning model to solve the visually grounded compositional concept learning problem. The proposed MetaReVision consists of a retrieval module and a meta-learning module which are designed to incorporate retrieved primitive concepts as supporting set to meta-train visual-language models for grounded compositional concept recognition. Through meta-learning from episodes constructed by the retriever, MetaReVision learns a generic compositional representation that can be fast updated to recognize novel composi tional concepts. We create CompCOCO and CompFlickr to benchmark the grounded compositional concept learning. Our experimental results show MetaReVision outperforms other competitive baselines and the retrieval module does plays an important role in this compositional learning process.
%R 10.18653/v1/2023.findings-emnlp.818
%U https://aclanthology.org/2023.findings-emnlp.818
%U https://doi.org/10.18653/v1/2023.findings-emnlp.818
%P 12224-12236
Markdown (Informal)
[MetaReVision: Meta-Learning with Retrieval for Visually Grounded Compositional Concept Acquisition](https://aclanthology.org/2023.findings-emnlp.818) (Xu et al., Findings 2023)
ACL