@inproceedings{chen-etal-2022-intent,
title = "Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining",
author = "Chen, Minhua and
Jayakumar, Badrinath and
Johnston, Michael and
Mahmoodi, S. Eman and
Pressel, Daniel",
editor = "Loukina, Anastassia and
Gangadharaiah, Rashmi and
Min, Bonan",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-industry.23",
doi = "10.18653/v1/2022.naacl-industry.23",
pages = "197--208",
abstract = "A key challenge in the creation and refinement of virtual assistants is the ability to mine unlabeled utterance data to discover common intents. We develop an approach to this problem that combines large-scale pre-training and multi-task learning to derive a semantic embedding that can be leveraged to identify clusters of utterances that correspond to unhandled intents. An utterance encoder is first trained with a language modeling objective and subsequently adapted to predict intent labels from a large collection of cross-domain enterprise virtual assistant data using a multi-task cosine softmax loss. Experimental evaluation shows significant advantages for this multi-step pre-training approach, with large gains in downstream clustering accuracy on new applications compared to standard sentence embedding approaches. The approach has been incorporated into an interactive discovery tool that enables visualization and exploration of intents by system analysts and builders.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2022-intent">
<titleInfo>
<title>Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minhua</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badrinath</namePart>
<namePart type="family">Jayakumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Johnston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">S</namePart>
<namePart type="given">Eman</namePart>
<namePart type="family">Mahmoodi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Pressel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Loukina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Gangadharaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonan</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A key challenge in the creation and refinement of virtual assistants is the ability to mine unlabeled utterance data to discover common intents. We develop an approach to this problem that combines large-scale pre-training and multi-task learning to derive a semantic embedding that can be leveraged to identify clusters of utterances that correspond to unhandled intents. An utterance encoder is first trained with a language modeling objective and subsequently adapted to predict intent labels from a large collection of cross-domain enterprise virtual assistant data using a multi-task cosine softmax loss. Experimental evaluation shows significant advantages for this multi-step pre-training approach, with large gains in downstream clustering accuracy on new applications compared to standard sentence embedding approaches. The approach has been incorporated into an interactive discovery tool that enables visualization and exploration of intents by system analysts and builders.</abstract>
<identifier type="citekey">chen-etal-2022-intent</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-industry.23</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-industry.23</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>197</start>
<end>208</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining
%A Chen, Minhua
%A Jayakumar, Badrinath
%A Johnston, Michael
%A Mahmoodi, S. Eman
%A Pressel, Daniel
%Y Loukina, Anastassia
%Y Gangadharaiah, Rashmi
%Y Min, Bonan
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F chen-etal-2022-intent
%X A key challenge in the creation and refinement of virtual assistants is the ability to mine unlabeled utterance data to discover common intents. We develop an approach to this problem that combines large-scale pre-training and multi-task learning to derive a semantic embedding that can be leveraged to identify clusters of utterances that correspond to unhandled intents. An utterance encoder is first trained with a language modeling objective and subsequently adapted to predict intent labels from a large collection of cross-domain enterprise virtual assistant data using a multi-task cosine softmax loss. Experimental evaluation shows significant advantages for this multi-step pre-training approach, with large gains in downstream clustering accuracy on new applications compared to standard sentence embedding approaches. The approach has been incorporated into an interactive discovery tool that enables visualization and exploration of intents by system analysts and builders.
%R 10.18653/v1/2022.naacl-industry.23
%U https://aclanthology.org/2022.naacl-industry.23
%U https://doi.org/10.18653/v1/2022.naacl-industry.23
%P 197-208
Markdown (Informal)
[Intent Discovery for Enterprise Virtual Assistants: Applications of Utterance Embedding and Clustering to Intent Mining](https://aclanthology.org/2022.naacl-industry.23) (Chen et al., NAACL 2022)
ACL