@inproceedings{ravi-kozareva-2019-device,
title = "On-device Structured and Context Partitioned Projection Networks",
author = "Ravi, Sujith and
Kozareva, Zornitsa",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1368/",
doi = "10.18653/v1/P19-1368",
pages = "3784--3793",
abstract = "A challenging problem in on-device text classification is to build highly accurate neural models that can fit in small memory footprint and have low latency. To address this challenge, we propose an on-device neural network SGNN++ which dynamically learns compact projection vectors from raw text using structured and context-dependent partition projections. We show that this results in accelerated inference and performance improvements. We conduct extensive evaluation on multiple conversational tasks and languages such as English, Japanese, Spanish and French. Our SGNN++ model significantly outperforms all baselines, improves upon existing on-device neural models and even surpasses RNN, CNN and BiLSTM models on dialog act and intent prediction. Through a series of ablation studies we show the impact of the partitioned projections and structured information leading to 10{\%} improvement. We study the impact of the model size on accuracy and introduce quatization-aware training for SGNN++ to further reduce the model size while preserving the same quality. Finally, we show fast inference on mobile phones."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ravi-kozareva-2019-device">
<titleInfo>
<title>On-device Structured and Context Partitioned Projection Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A challenging problem in on-device text classification is to build highly accurate neural models that can fit in small memory footprint and have low latency. To address this challenge, we propose an on-device neural network SGNN++ which dynamically learns compact projection vectors from raw text using structured and context-dependent partition projections. We show that this results in accelerated inference and performance improvements. We conduct extensive evaluation on multiple conversational tasks and languages such as English, Japanese, Spanish and French. Our SGNN++ model significantly outperforms all baselines, improves upon existing on-device neural models and even surpasses RNN, CNN and BiLSTM models on dialog act and intent prediction. Through a series of ablation studies we show the impact of the partitioned projections and structured information leading to 10% improvement. We study the impact of the model size on accuracy and introduce quatization-aware training for SGNN++ to further reduce the model size while preserving the same quality. Finally, we show fast inference on mobile phones.</abstract>
<identifier type="citekey">ravi-kozareva-2019-device</identifier>
<identifier type="doi">10.18653/v1/P19-1368</identifier>
<location>
<url>https://aclanthology.org/P19-1368/</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>3784</start>
<end>3793</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On-device Structured and Context Partitioned Projection Networks
%A Ravi, Sujith
%A Kozareva, Zornitsa
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F ravi-kozareva-2019-device
%X A challenging problem in on-device text classification is to build highly accurate neural models that can fit in small memory footprint and have low latency. To address this challenge, we propose an on-device neural network SGNN++ which dynamically learns compact projection vectors from raw text using structured and context-dependent partition projections. We show that this results in accelerated inference and performance improvements. We conduct extensive evaluation on multiple conversational tasks and languages such as English, Japanese, Spanish and French. Our SGNN++ model significantly outperforms all baselines, improves upon existing on-device neural models and even surpasses RNN, CNN and BiLSTM models on dialog act and intent prediction. Through a series of ablation studies we show the impact of the partitioned projections and structured information leading to 10% improvement. We study the impact of the model size on accuracy and introduce quatization-aware training for SGNN++ to further reduce the model size while preserving the same quality. Finally, we show fast inference on mobile phones.
%R 10.18653/v1/P19-1368
%U https://aclanthology.org/P19-1368/
%U https://doi.org/10.18653/v1/P19-1368
%P 3784-3793
Markdown (Informal)
[On-device Structured and Context Partitioned Projection Networks](https://aclanthology.org/P19-1368/) (Ravi & Kozareva, ACL 2019)
ACL