@inproceedings{xie-etal-2026-building,
title = "Building Multi-turn Intent Classification with {LLM}-based Labeling",
author = "Xie, Biancen and
Bian, Kaiqi and
Gusain, Jai Ranjan Singh and
Ramanathan, Manikandarajan and
Maragoud, Raj",
editor = "Mysore, Sheshera and
Kumar, Sachin and
Balachandran, Vidhisha and
Hayati, Shirley Anugrah and
Brahman, Faeze and
Moussa, Hanane Nour and
Salemi, Alireza",
booktitle = "Proceedings of the Second Workshop on Customizable {NLP}: Progress and Challenges in Customizing {NLP} for a Domain, Application, Group, or Individual ({C}ustom{NLP}4{U})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.customnlp4u-1.8/",
pages = "68--83",
ISBN = "979-8-89176-396-8",
abstract = "Intent classification is essential for customer service routing, connecting customers to the appropriate agents and reducing handling time and operational cost. Developing a real-world multi-turn intent classification system is challenging due to complex intent taxonomies, dynamic intent switching within conversations, and limited labeled training data. To address these challenges, we propose a scalable multi-turn intent classification framework for ecommerce customer service that models intent along multiple dimensions. We introduce LLMbased labeling strategies to annotate real customer transcripts at scale and augment training with LLM-simulated multi-turn dialogues that expand coverage of topic and intent switches, which are rare in existing transcripts. Through extensive experiments, we find that explanationguided labeling with a self-critique step produces the most accurate training labels. Finetuned models built on a RoBERTa backbone outperform zero-shot LLM prompting while achieving substantially lower inference latency. Finally, we show that a hybrid approach that combines the fine-tuned classifier with LLM prompting further improves accuracy over either component alone. Overall, our results provide practical guidance for building and deploying high-accuracy, low-latency, large-scale multi-turn intent classification systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-etal-2026-building">
<titleInfo>
<title>Building Multi-turn Intent Classification with LLM-based Labeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Biancen</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiqi</namePart>
<namePart type="family">Bian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jai</namePart>
<namePart type="given">Ranjan</namePart>
<namePart type="given">Singh</namePart>
<namePart type="family">Gusain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manikandarajan</namePart>
<namePart type="family">Ramanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raj</namePart>
<namePart type="family">Maragoud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheshera</namePart>
<namePart type="family">Mysore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sachin</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vidhisha</namePart>
<namePart type="family">Balachandran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shirley</namePart>
<namePart type="given">Anugrah</namePart>
<namePart type="family">Hayati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faeze</namePart>
<namePart type="family">Brahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanane</namePart>
<namePart type="given">Nour</namePart>
<namePart type="family">Moussa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alireza</namePart>
<namePart type="family">Salemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-396-8</identifier>
</relatedItem>
<abstract>Intent classification is essential for customer service routing, connecting customers to the appropriate agents and reducing handling time and operational cost. Developing a real-world multi-turn intent classification system is challenging due to complex intent taxonomies, dynamic intent switching within conversations, and limited labeled training data. To address these challenges, we propose a scalable multi-turn intent classification framework for ecommerce customer service that models intent along multiple dimensions. We introduce LLMbased labeling strategies to annotate real customer transcripts at scale and augment training with LLM-simulated multi-turn dialogues that expand coverage of topic and intent switches, which are rare in existing transcripts. Through extensive experiments, we find that explanationguided labeling with a self-critique step produces the most accurate training labels. Finetuned models built on a RoBERTa backbone outperform zero-shot LLM prompting while achieving substantially lower inference latency. Finally, we show that a hybrid approach that combines the fine-tuned classifier with LLM prompting further improves accuracy over either component alone. Overall, our results provide practical guidance for building and deploying high-accuracy, low-latency, large-scale multi-turn intent classification systems.</abstract>
<identifier type="citekey">xie-etal-2026-building</identifier>
<location>
<url>https://aclanthology.org/2026.customnlp4u-1.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>68</start>
<end>83</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building Multi-turn Intent Classification with LLM-based Labeling
%A Xie, Biancen
%A Bian, Kaiqi
%A Gusain, Jai Ranjan Singh
%A Ramanathan, Manikandarajan
%A Maragoud, Raj
%Y Mysore, Sheshera
%Y Kumar, Sachin
%Y Balachandran, Vidhisha
%Y Hayati, Shirley Anugrah
%Y Brahman, Faeze
%Y Moussa, Hanane Nour
%Y Salemi, Alireza
%S Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-396-8
%F xie-etal-2026-building
%X Intent classification is essential for customer service routing, connecting customers to the appropriate agents and reducing handling time and operational cost. Developing a real-world multi-turn intent classification system is challenging due to complex intent taxonomies, dynamic intent switching within conversations, and limited labeled training data. To address these challenges, we propose a scalable multi-turn intent classification framework for ecommerce customer service that models intent along multiple dimensions. We introduce LLMbased labeling strategies to annotate real customer transcripts at scale and augment training with LLM-simulated multi-turn dialogues that expand coverage of topic and intent switches, which are rare in existing transcripts. Through extensive experiments, we find that explanationguided labeling with a self-critique step produces the most accurate training labels. Finetuned models built on a RoBERTa backbone outperform zero-shot LLM prompting while achieving substantially lower inference latency. Finally, we show that a hybrid approach that combines the fine-tuned classifier with LLM prompting further improves accuracy over either component alone. Overall, our results provide practical guidance for building and deploying high-accuracy, low-latency, large-scale multi-turn intent classification systems.
%U https://aclanthology.org/2026.customnlp4u-1.8/
%P 68-83
Markdown (Informal)
[Building Multi-turn Intent Classification with LLM-based Labeling](https://aclanthology.org/2026.customnlp4u-1.8/) (Xie et al., CustomNLP4U 2026)
ACL
- Biancen Xie, Kaiqi Bian, Jai Ranjan Singh Gusain, Manikandarajan Ramanathan, and Raj Maragoud. 2026. Building Multi-turn Intent Classification with LLM-based Labeling. In Proceedings of the Second Workshop on Customizable NLP: Progress and Challenges in Customizing NLP for a Domain, Application, Group, or Individual (CustomNLP4U), pages 68–83, San Diego, California, USA. Association for Computational Linguistics.