@inproceedings{akula-etal-2024-prism,
title = "{PRISM}: A New Lens for Improved Color Understanding",
author = "Akula, Arjun Reddy and
Pruthi, Garima and
Dhillon, Inderjit S and
Narayana, Pradyumna and
Basu, Sugato and
Jampani, Varun",
editor = "Dernoncourt, Franck and
Preo{\c{t}}iuc-Pietro, Daniel and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-industry.121",
pages = "1659--1670",
abstract = "While image-text pre-trained models, such as CLIP, have demonstrated impressive capabilities in learning robust text and image representations, a critical area for substantial improvement remains{---}precise color understanding. In this paper, we address this limitation by introducing PRISM, a simple yet highly effective method that extends CLIP{'}s capability to grasp the nuances of precise colors. PRISM seamlessly adapts to both recognized HTML colors and out-of-vocabulary RGB inputs through the utilization of our curated dataset of 100 image-text pairs, which can be effortlessly repurposed for fine-tuning with any desired color. Importantly, PRISM achieves these enhancements without compromising CLIP{'}s performance on established benchmarks. Furthermore, we introduce a novel evaluation framework, ColorLens, featuring both seen and unseen test sets that can be readily repurposed to assess a model{'}s precision in understanding precise colors. Our comprehensive evaluation and results demonstrate significant improvements over baseline models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akula-etal-2024-prism">
<titleInfo>
<title>PRISM: A New Lens for Improved Color Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="given">Reddy</namePart>
<namePart type="family">Akula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Garima</namePart>
<namePart type="family">Pruthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inderjit</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Dhillon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pradyumna</namePart>
<namePart type="family">Narayana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sugato</namePart>
<namePart type="family">Basu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varun</namePart>
<namePart type="family">Jampani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoţiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While image-text pre-trained models, such as CLIP, have demonstrated impressive capabilities in learning robust text and image representations, a critical area for substantial improvement remains—precise color understanding. In this paper, we address this limitation by introducing PRISM, a simple yet highly effective method that extends CLIP’s capability to grasp the nuances of precise colors. PRISM seamlessly adapts to both recognized HTML colors and out-of-vocabulary RGB inputs through the utilization of our curated dataset of 100 image-text pairs, which can be effortlessly repurposed for fine-tuning with any desired color. Importantly, PRISM achieves these enhancements without compromising CLIP’s performance on established benchmarks. Furthermore, we introduce a novel evaluation framework, ColorLens, featuring both seen and unseen test sets that can be readily repurposed to assess a model’s precision in understanding precise colors. Our comprehensive evaluation and results demonstrate significant improvements over baseline models.</abstract>
<identifier type="citekey">akula-etal-2024-prism</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-industry.121</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>1659</start>
<end>1670</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PRISM: A New Lens for Improved Color Understanding
%A Akula, Arjun Reddy
%A Pruthi, Garima
%A Dhillon, Inderjit S.
%A Narayana, Pradyumna
%A Basu, Sugato
%A Jampani, Varun
%Y Dernoncourt, Franck
%Y Preoţiuc-Pietro, Daniel
%Y Shimorina, Anastasia
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F akula-etal-2024-prism
%X While image-text pre-trained models, such as CLIP, have demonstrated impressive capabilities in learning robust text and image representations, a critical area for substantial improvement remains—precise color understanding. In this paper, we address this limitation by introducing PRISM, a simple yet highly effective method that extends CLIP’s capability to grasp the nuances of precise colors. PRISM seamlessly adapts to both recognized HTML colors and out-of-vocabulary RGB inputs through the utilization of our curated dataset of 100 image-text pairs, which can be effortlessly repurposed for fine-tuning with any desired color. Importantly, PRISM achieves these enhancements without compromising CLIP’s performance on established benchmarks. Furthermore, we introduce a novel evaluation framework, ColorLens, featuring both seen and unseen test sets that can be readily repurposed to assess a model’s precision in understanding precise colors. Our comprehensive evaluation and results demonstrate significant improvements over baseline models.
%U https://aclanthology.org/2024.emnlp-industry.121
%P 1659-1670
Markdown (Informal)
[PRISM: A New Lens for Improved Color Understanding](https://aclanthology.org/2024.emnlp-industry.121) (Akula et al., EMNLP 2024)
ACL
- Arjun Reddy Akula, Garima Pruthi, Inderjit S Dhillon, Pradyumna Narayana, Sugato Basu, and Varun Jampani. 2024. PRISM: A New Lens for Improved Color Understanding. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 1659–1670, Miami, Florida, US. Association for Computational Linguistics.