@inproceedings{sauer-etal-2022-knowledge,
title = "Knowledge Distillation Meets Few-Shot Learning: An Approach for Few-Shot Intent Classification Within and Across Domains",
author = {Sauer, Anna and
Asaadi, Shima and
K{\"u}ch, Fabian},
editor = "Liu, Bing and
Papangelis, Alexandros and
Ultes, Stefan and
Rastogi, Abhinav and
Chen, Yun-Nung and
Spithourakis, Georgios and
Nouri, Elnaz and
Shi, Weiyan",
booktitle = "Proceedings of the 4th Workshop on NLP for Conversational AI",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlp4convai-1.10",
doi = "10.18653/v1/2022.nlp4convai-1.10",
pages = "108--119",
abstract = "Large Transformer-based natural language understanding models have achieved state-of-the-art performance in dialogue systems. However, scarce labeled data for training, the large model size, and low inference speed hinder their deployment in low-resource scenarios. Few-shot learning and knowledge distillation techniques have been introduced to reduce the need for labeled data and computational resources, respectively. However, these techniques are incompatible because few-shot learning trains models using few data, whereas, knowledge distillation requires sufficient data to train smaller, yet competitive models that run on limited computational resources. In this paper, we address the problem of distilling generalizable small models under the few-shot setting for the intent classification task. Considering in-domain and cross-domain few-shot learning scenarios, we introduce an approach for distilling small models that generalize to new intent classes and domains using only a handful of labeled examples. We conduct experiments on public intent classification benchmarks, and observe a slight performance gap between small models and large Transformer-based models. Overall, our results in both few-shot scenarios confirm the generalization ability of the small distilled models while having lower computational costs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sauer-etal-2022-knowledge">
<titleInfo>
<title>Knowledge Distillation Meets Few-Shot Learning: An Approach for Few-Shot Intent Classification Within and Across Domains</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Sauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shima</namePart>
<namePart type="family">Asaadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="family">Küch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on NLP for Conversational AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Ultes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Rastogi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georgios</namePart>
<namePart type="family">Spithourakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elnaz</namePart>
<namePart type="family">Nouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiyan</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Transformer-based natural language understanding models have achieved state-of-the-art performance in dialogue systems. However, scarce labeled data for training, the large model size, and low inference speed hinder their deployment in low-resource scenarios. Few-shot learning and knowledge distillation techniques have been introduced to reduce the need for labeled data and computational resources, respectively. However, these techniques are incompatible because few-shot learning trains models using few data, whereas, knowledge distillation requires sufficient data to train smaller, yet competitive models that run on limited computational resources. In this paper, we address the problem of distilling generalizable small models under the few-shot setting for the intent classification task. Considering in-domain and cross-domain few-shot learning scenarios, we introduce an approach for distilling small models that generalize to new intent classes and domains using only a handful of labeled examples. We conduct experiments on public intent classification benchmarks, and observe a slight performance gap between small models and large Transformer-based models. Overall, our results in both few-shot scenarios confirm the generalization ability of the small distilled models while having lower computational costs.</abstract>
<identifier type="citekey">sauer-etal-2022-knowledge</identifier>
<identifier type="doi">10.18653/v1/2022.nlp4convai-1.10</identifier>
<location>
<url>https://aclanthology.org/2022.nlp4convai-1.10</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>108</start>
<end>119</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Knowledge Distillation Meets Few-Shot Learning: An Approach for Few-Shot Intent Classification Within and Across Domains
%A Sauer, Anna
%A Asaadi, Shima
%A Küch, Fabian
%Y Liu, Bing
%Y Papangelis, Alexandros
%Y Ultes, Stefan
%Y Rastogi, Abhinav
%Y Chen, Yun-Nung
%Y Spithourakis, Georgios
%Y Nouri, Elnaz
%Y Shi, Weiyan
%S Proceedings of the 4th Workshop on NLP for Conversational AI
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F sauer-etal-2022-knowledge
%X Large Transformer-based natural language understanding models have achieved state-of-the-art performance in dialogue systems. However, scarce labeled data for training, the large model size, and low inference speed hinder their deployment in low-resource scenarios. Few-shot learning and knowledge distillation techniques have been introduced to reduce the need for labeled data and computational resources, respectively. However, these techniques are incompatible because few-shot learning trains models using few data, whereas, knowledge distillation requires sufficient data to train smaller, yet competitive models that run on limited computational resources. In this paper, we address the problem of distilling generalizable small models under the few-shot setting for the intent classification task. Considering in-domain and cross-domain few-shot learning scenarios, we introduce an approach for distilling small models that generalize to new intent classes and domains using only a handful of labeled examples. We conduct experiments on public intent classification benchmarks, and observe a slight performance gap between small models and large Transformer-based models. Overall, our results in both few-shot scenarios confirm the generalization ability of the small distilled models while having lower computational costs.
%R 10.18653/v1/2022.nlp4convai-1.10
%U https://aclanthology.org/2022.nlp4convai-1.10
%U https://doi.org/10.18653/v1/2022.nlp4convai-1.10
%P 108-119
Markdown (Informal)
[Knowledge Distillation Meets Few-Shot Learning: An Approach for Few-Shot Intent Classification Within and Across Domains](https://aclanthology.org/2022.nlp4convai-1.10) (Sauer et al., NLP4ConvAI 2022)
ACL