@inproceedings{ahuja-desai-2020-accelerating,
title = "Accelerating Natural Language Understanding in Task-Oriented Dialog",
author = "Ahuja, Ojas and
Desai, Shrey",
editor = "Wen, Tsung-Hsien and
Celikyilmaz, Asli and
Yu, Zhou and
Papangelis, Alexandros and
Eric, Mihail and
Kumar, Anuj and
Casanueva, I{\~n}igo and
Shah, Rushin",
booktitle = "Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlp4convai-1.6",
doi = "10.18653/v1/2020.nlp4convai-1.6",
pages = "46--53",
abstract = "Task-oriented dialog models typically leverage complex neural architectures and large-scale, pre-trained Transformers to achieve state-of-the-art performance on popular natural language understanding benchmarks. However, these models frequently have in excess of tens of millions of parameters, making them impossible to deploy on-device where resource-efficiency is a major concern. In this work, we show that a simple convolutional model compressed with structured pruning achieves largely comparable results to BERT on ATIS and Snips, with under 100K parameters. Moreover, we perform acceleration experiments on CPUs, where we observe our multi-task model predicts intents and slots nearly 63x faster than even DistilBERT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ahuja-desai-2020-accelerating">
<titleInfo>
<title>Accelerating Natural Language Understanding in Task-Oriented Dialog</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ojas</namePart>
<namePart type="family">Ahuja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shrey</namePart>
<namePart type="family">Desai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tsung-Hsien</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asli</namePart>
<namePart type="family">Celikyilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhou</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihail</namePart>
<namePart type="family">Eric</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anuj</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iñigo</namePart>
<namePart type="family">Casanueva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rushin</namePart>
<namePart type="family">Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Task-oriented dialog models typically leverage complex neural architectures and large-scale, pre-trained Transformers to achieve state-of-the-art performance on popular natural language understanding benchmarks. However, these models frequently have in excess of tens of millions of parameters, making them impossible to deploy on-device where resource-efficiency is a major concern. In this work, we show that a simple convolutional model compressed with structured pruning achieves largely comparable results to BERT on ATIS and Snips, with under 100K parameters. Moreover, we perform acceleration experiments on CPUs, where we observe our multi-task model predicts intents and slots nearly 63x faster than even DistilBERT.</abstract>
<identifier type="citekey">ahuja-desai-2020-accelerating</identifier>
<identifier type="doi">10.18653/v1/2020.nlp4convai-1.6</identifier>
<location>
<url>https://aclanthology.org/2020.nlp4convai-1.6</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>46</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Accelerating Natural Language Understanding in Task-Oriented Dialog
%A Ahuja, Ojas
%A Desai, Shrey
%Y Wen, Tsung-Hsien
%Y Celikyilmaz, Asli
%Y Yu, Zhou
%Y Papangelis, Alexandros
%Y Eric, Mihail
%Y Kumar, Anuj
%Y Casanueva, Iñigo
%Y Shah, Rushin
%S Proceedings of the 2nd Workshop on Natural Language Processing for Conversational AI
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F ahuja-desai-2020-accelerating
%X Task-oriented dialog models typically leverage complex neural architectures and large-scale, pre-trained Transformers to achieve state-of-the-art performance on popular natural language understanding benchmarks. However, these models frequently have in excess of tens of millions of parameters, making them impossible to deploy on-device where resource-efficiency is a major concern. In this work, we show that a simple convolutional model compressed with structured pruning achieves largely comparable results to BERT on ATIS and Snips, with under 100K parameters. Moreover, we perform acceleration experiments on CPUs, where we observe our multi-task model predicts intents and slots nearly 63x faster than even DistilBERT.
%R 10.18653/v1/2020.nlp4convai-1.6
%U https://aclanthology.org/2020.nlp4convai-1.6
%U https://doi.org/10.18653/v1/2020.nlp4convai-1.6
%P 46-53
Markdown (Informal)
[Accelerating Natural Language Understanding in Task-Oriented Dialog](https://aclanthology.org/2020.nlp4convai-1.6) (Ahuja & Desai, NLP4ConvAI 2020)
ACL