@inproceedings{stephan-etal-2024-text-guided,
title = "Text-Guided Alternative Image Clustering",
author = "Stephan, Andreas and
Miklautz, Lukas and
Leiber, Collin and
Luz De Araujo, Pedro Henrique and
R{\'e}p{\'a}s, Dominik and
Plant, Claudia and
Roth, Benjamin",
editor = "Zhao, Chen and
Mosbach, Marius and
Atanasova, Pepa and
Goldfarb-Tarrent, Seraphina and
Hase, Peter and
Hosseini, Arian and
Elbayad, Maha and
Pezzelle, Sandro and
Mozes, Maximilian",
booktitle = "Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.repl4nlp-1.13",
pages = "177--190",
abstract = "Traditional image clustering techniques only find a single grouping within visual data. In particular, they do not provide a possibility to explicitly define multiple types of clustering. This work explores the potential of large vision-language models to facilitate alternative image clustering. We propose Text-Guided Alternative Image Consensus Clustering (TGAICC), a novel approach that leverages user-specified interests via prompts to guide the discovery of diverse clusterings. To achieve this, it generates a clustering for each prompt, groups them using hierarchical clustering, and then aggregates them using consensus clustering. TGAICC outperforms image- and text-based baselines on four alternative image clustering benchmark datasets. Furthermore, using count-based word statistics, we are able to obtain text-based explanations of the alternative clusterings. In conclusion, our research illustrates how contemporary large vision-language models can transform explanatory data analysis, enabling the generation of insightful, customizable, and diverse image clusterings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stephan-etal-2024-text-guided">
<titleInfo>
<title>Text-Guided Alternative Image Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Stephan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Miklautz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Collin</namePart>
<namePart type="family">Leiber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="given">Henrique</namePart>
<namePart type="family">Luz De Araujo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dominik</namePart>
<namePart type="family">Répás</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Plant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marius</namePart>
<namePart type="family">Mosbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pepa</namePart>
<namePart type="family">Atanasova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seraphina</namePart>
<namePart type="family">Goldfarb-Tarrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Hase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arian</namePart>
<namePart type="family">Hosseini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maha</namePart>
<namePart type="family">Elbayad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Mozes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Traditional image clustering techniques only find a single grouping within visual data. In particular, they do not provide a possibility to explicitly define multiple types of clustering. This work explores the potential of large vision-language models to facilitate alternative image clustering. We propose Text-Guided Alternative Image Consensus Clustering (TGAICC), a novel approach that leverages user-specified interests via prompts to guide the discovery of diverse clusterings. To achieve this, it generates a clustering for each prompt, groups them using hierarchical clustering, and then aggregates them using consensus clustering. TGAICC outperforms image- and text-based baselines on four alternative image clustering benchmark datasets. Furthermore, using count-based word statistics, we are able to obtain text-based explanations of the alternative clusterings. In conclusion, our research illustrates how contemporary large vision-language models can transform explanatory data analysis, enabling the generation of insightful, customizable, and diverse image clusterings.</abstract>
<identifier type="citekey">stephan-etal-2024-text-guided</identifier>
<location>
<url>https://aclanthology.org/2024.repl4nlp-1.13</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>177</start>
<end>190</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text-Guided Alternative Image Clustering
%A Stephan, Andreas
%A Miklautz, Lukas
%A Leiber, Collin
%A Luz De Araujo, Pedro Henrique
%A Répás, Dominik
%A Plant, Claudia
%A Roth, Benjamin
%Y Zhao, Chen
%Y Mosbach, Marius
%Y Atanasova, Pepa
%Y Goldfarb-Tarrent, Seraphina
%Y Hase, Peter
%Y Hosseini, Arian
%Y Elbayad, Maha
%Y Pezzelle, Sandro
%Y Mozes, Maximilian
%S Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F stephan-etal-2024-text-guided
%X Traditional image clustering techniques only find a single grouping within visual data. In particular, they do not provide a possibility to explicitly define multiple types of clustering. This work explores the potential of large vision-language models to facilitate alternative image clustering. We propose Text-Guided Alternative Image Consensus Clustering (TGAICC), a novel approach that leverages user-specified interests via prompts to guide the discovery of diverse clusterings. To achieve this, it generates a clustering for each prompt, groups them using hierarchical clustering, and then aggregates them using consensus clustering. TGAICC outperforms image- and text-based baselines on four alternative image clustering benchmark datasets. Furthermore, using count-based word statistics, we are able to obtain text-based explanations of the alternative clusterings. In conclusion, our research illustrates how contemporary large vision-language models can transform explanatory data analysis, enabling the generation of insightful, customizable, and diverse image clusterings.
%U https://aclanthology.org/2024.repl4nlp-1.13
%P 177-190
Markdown (Informal)
[Text-Guided Alternative Image Clustering](https://aclanthology.org/2024.repl4nlp-1.13) (Stephan et al., RepL4NLP-WS 2024)
ACL
- Andreas Stephan, Lukas Miklautz, Collin Leiber, Pedro Henrique Luz De Araujo, Dominik Répás, Claudia Plant, and Benjamin Roth. 2024. Text-Guided Alternative Image Clustering. In Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024), pages 177–190, Bangkok, Thailand. Association for Computational Linguistics.