@inproceedings{senuma-aizawa-2016-learning,
title = "Learning Succinct Models: Pipelined Compression with {L}1-Regularization, Hashing, {E}lias-{F}ano Indices, and Quantization",
author = "Senuma, Hajime and
Aizawa, Akiko",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-1261/",
pages = "2774--2784",
abstract = "The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are $m$ features in total but only $n = o(\sqrt{m})$ features are required to distinguish examples, with $\Omega(\log m)$ training examples and reasonable settings, it is possible to obtain a good model in a \textit{succinct} representation using $n \log_2 \frac{m}{n} + o(m)$ bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias{--}Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 \textit{kilo}bytes without notable loss of accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="senuma-aizawa-2016-learning">
<titleInfo>
<title>Learning Succinct Models: Pipelined Compression with L1-Regularization, Hashing, Elias-Fano Indices, and Quantization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hajime</namePart>
<namePart type="family">Senuma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are m features in total but only n = o(\sqrtm) features are required to distinguish examples, with Ømega(łog m) training examples and reasonable settings, it is possible to obtain a good model in a succinct representation using n łog₂ \fracmn + o(m) bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias–Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 kilobytes without notable loss of accuracy.</abstract>
<identifier type="citekey">senuma-aizawa-2016-learning</identifier>
<location>
<url>https://aclanthology.org/C16-1261/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>2774</start>
<end>2784</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Succinct Models: Pipelined Compression with L1-Regularization, Hashing, Elias-Fano Indices, and Quantization
%A Senuma, Hajime
%A Aizawa, Akiko
%Y Matsumoto, Yuji
%Y Prasad, Rashmi
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F senuma-aizawa-2016-learning
%X The recent proliferation of smart devices necessitates methods to learn small-sized models. This paper demonstrates that if there are m features in total but only n = o(\sqrtm) features are required to distinguish examples, with Ømega(łog m) training examples and reasonable settings, it is possible to obtain a good model in a succinct representation using n łog₂ \fracmn + o(m) bits, by using a pipeline of existing compression methods: L1-regularized logistic regression, feature hashing, Elias–Fano indices, and randomized quantization. An experiment shows that a noun phrase chunking task for which an existing library requires 27 megabytes can be compressed to less than 13 kilobytes without notable loss of accuracy.
%U https://aclanthology.org/C16-1261/
%P 2774-2784
Markdown (Informal)
[Learning Succinct Models: Pipelined Compression with L1-Regularization, Hashing, Elias-Fano Indices, and Quantization](https://aclanthology.org/C16-1261/) (Senuma & Aizawa, COLING 2016)
ACL