@inproceedings{pattnaik-etal-2024-improving,
title = "Improving Hierarchical Text Clustering with {LLM}-guided Multi-view Cluster Representation",
author = "Pattnaik, Anup and
George, Cijo and
Tripathi, Rishabh Kumar and
Vutla, Sasanka and
Vepa, Jithendra",
editor = "Dernoncourt, Franck and
Preo{\c{t}}iuc-Pietro, Daniel and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-industry.54",
pages = "719--727",
abstract = "In this work, we present an approach that introduces different perspectives or views to improve the quality of hierarchical clustering of interaction drivers in a contact center. Specifically, we present a multi-stage approach that introduces LLM-guided multi-view cluster representation that significantly improves the quality of generated clusters. Our approach improves average Silhouette Score by upto 70{\%} and Human Preference Scores by 36.7{\%} for top-level clusters compared to standard agglomerative clustering for the given business use-case. We also present how the proposed approach can be adapted to cater to a standard non-hierarchical clustering use-cases where it achieves state-of-the-art performance on public datasets based on NMI and ACC scores, with minimal number of LLM queries compared to the current state-of-the-art approaches. Moreover, we apply our technique to generate two new labeled datasets for hierarchical clustering. We open-source these labeled datasets, validated and corrected by domain experts, for the benefit of the research community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pattnaik-etal-2024-improving">
<titleInfo>
<title>Improving Hierarchical Text Clustering with LLM-guided Multi-view Cluster Representation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anup</namePart>
<namePart type="family">Pattnaik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cijo</namePart>
<namePart type="family">George</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Tripathi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sasanka</namePart>
<namePart type="family">Vutla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jithendra</namePart>
<namePart type="family">Vepa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoţiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we present an approach that introduces different perspectives or views to improve the quality of hierarchical clustering of interaction drivers in a contact center. Specifically, we present a multi-stage approach that introduces LLM-guided multi-view cluster representation that significantly improves the quality of generated clusters. Our approach improves average Silhouette Score by upto 70% and Human Preference Scores by 36.7% for top-level clusters compared to standard agglomerative clustering for the given business use-case. We also present how the proposed approach can be adapted to cater to a standard non-hierarchical clustering use-cases where it achieves state-of-the-art performance on public datasets based on NMI and ACC scores, with minimal number of LLM queries compared to the current state-of-the-art approaches. Moreover, we apply our technique to generate two new labeled datasets for hierarchical clustering. We open-source these labeled datasets, validated and corrected by domain experts, for the benefit of the research community.</abstract>
<identifier type="citekey">pattnaik-etal-2024-improving</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-industry.54</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>719</start>
<end>727</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Hierarchical Text Clustering with LLM-guided Multi-view Cluster Representation
%A Pattnaik, Anup
%A George, Cijo
%A Tripathi, Rishabh Kumar
%A Vutla, Sasanka
%A Vepa, Jithendra
%Y Dernoncourt, Franck
%Y Preoţiuc-Pietro, Daniel
%Y Shimorina, Anastasia
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F pattnaik-etal-2024-improving
%X In this work, we present an approach that introduces different perspectives or views to improve the quality of hierarchical clustering of interaction drivers in a contact center. Specifically, we present a multi-stage approach that introduces LLM-guided multi-view cluster representation that significantly improves the quality of generated clusters. Our approach improves average Silhouette Score by upto 70% and Human Preference Scores by 36.7% for top-level clusters compared to standard agglomerative clustering for the given business use-case. We also present how the proposed approach can be adapted to cater to a standard non-hierarchical clustering use-cases where it achieves state-of-the-art performance on public datasets based on NMI and ACC scores, with minimal number of LLM queries compared to the current state-of-the-art approaches. Moreover, we apply our technique to generate two new labeled datasets for hierarchical clustering. We open-source these labeled datasets, validated and corrected by domain experts, for the benefit of the research community.
%U https://aclanthology.org/2024.emnlp-industry.54
%P 719-727
Markdown (Informal)
[Improving Hierarchical Text Clustering with LLM-guided Multi-view Cluster Representation](https://aclanthology.org/2024.emnlp-industry.54) (Pattnaik et al., EMNLP 2024)
ACL