@inproceedings{pattnaik-etal-2025-scalable,
title = "Scalable and Cost Effective High-Cardinality Classification with {LLM}s via Multi-View Label Representations and Retrieval Augmentation",
author = "Pattnaik, Anup and
Vutla, Sasanka and
Dev, Hamvir and
Nandan, Jeevesh and
George, Cijo",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.137/",
doi = "10.18653/v1/2025.emnlp-industry.137",
pages = "1955--1969",
ISBN = "979-8-89176-333-3",
abstract = "Classifying contact center interactions into a large number of categories is critical for downstream analytics, but challenging due to high label cardinality, and cost constraints. While Large Language Models (LLMs) offer flexibility for such tasks, existing methods degrade with increasing label space, showing significant inconsistencies and sensitivity to label ordering. We propose a scalable, cost-effective two-step retrieval-augmented classification framework, enhanced with a multi-view representation of labels. Our method significantly improves accuracy and consistency over baseline LLM approaches. Experiments across 4 private and 5 open datasets yield performance improvements of upto 14.6{\%} while reducing inference cost by 60-91{\%} compared to baseline approaches."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pattnaik-etal-2025-scalable">
<titleInfo>
<title>Scalable and Cost Effective High-Cardinality Classification with LLMs via Multi-View Label Representations and Retrieval Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anup</namePart>
<namePart type="family">Pattnaik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sasanka</namePart>
<namePart type="family">Vutla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamvir</namePart>
<namePart type="family">Dev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeevesh</namePart>
<namePart type="family">Nandan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cijo</namePart>
<namePart type="family">George</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Classifying contact center interactions into a large number of categories is critical for downstream analytics, but challenging due to high label cardinality, and cost constraints. While Large Language Models (LLMs) offer flexibility for such tasks, existing methods degrade with increasing label space, showing significant inconsistencies and sensitivity to label ordering. We propose a scalable, cost-effective two-step retrieval-augmented classification framework, enhanced with a multi-view representation of labels. Our method significantly improves accuracy and consistency over baseline LLM approaches. Experiments across 4 private and 5 open datasets yield performance improvements of upto 14.6% while reducing inference cost by 60-91% compared to baseline approaches.</abstract>
<identifier type="citekey">pattnaik-etal-2025-scalable</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-industry.137</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.137/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1955</start>
<end>1969</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Scalable and Cost Effective High-Cardinality Classification with LLMs via Multi-View Label Representations and Retrieval Augmentation
%A Pattnaik, Anup
%A Vutla, Sasanka
%A Dev, Hamvir
%A Nandan, Jeevesh
%A George, Cijo
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F pattnaik-etal-2025-scalable
%X Classifying contact center interactions into a large number of categories is critical for downstream analytics, but challenging due to high label cardinality, and cost constraints. While Large Language Models (LLMs) offer flexibility for such tasks, existing methods degrade with increasing label space, showing significant inconsistencies and sensitivity to label ordering. We propose a scalable, cost-effective two-step retrieval-augmented classification framework, enhanced with a multi-view representation of labels. Our method significantly improves accuracy and consistency over baseline LLM approaches. Experiments across 4 private and 5 open datasets yield performance improvements of upto 14.6% while reducing inference cost by 60-91% compared to baseline approaches.
%R 10.18653/v1/2025.emnlp-industry.137
%U https://aclanthology.org/2025.emnlp-industry.137/
%U https://doi.org/10.18653/v1/2025.emnlp-industry.137
%P 1955-1969
Markdown (Informal)
[Scalable and Cost Effective High-Cardinality Classification with LLMs via Multi-View Label Representations and Retrieval Augmentation](https://aclanthology.org/2025.emnlp-industry.137/) (Pattnaik et al., EMNLP 2025)
ACL