@inproceedings{miceli-barone-2018-low,
title = "Low-rank passthrough neural networks",
author = "Miceli Barone, Antonio Valerio",
editor = "Haffari, Reza and
Cherry, Colin and
Foster, George and
Khadivi, Shahram and
Salehi, Bahar",
booktitle = "Proceedings of the Workshop on Deep Learning Approaches for Low-Resource {NLP}",
month = jul,
year = "2018",
address = "Melbourne",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3410",
doi = "10.18653/v1/W18-3410",
pages = "77--86",
abstract = "Various common deep learning architectures, such as LSTMs, GRUs, Resnets and Highway Networks, employ state passthrough connections that support training with high feed-forward depth or recurrence over many time steps. These {``}Passthrough Networks{''} architectures also enable the decoupling of the network state size from the number of parameters of the network, a possibility has been studied by Sak et al. (2014) with their low-rank parametrization of the LSTM. In this work we extend this line of research, proposing effective, low-rank and low-rank plus diagonal matrix parametrizations for Passthrough Networks which exploit this decoupling property, reducing the data complexity and memory requirements of the network while preserving its memory capacity. This is particularly beneficial in low-resource settings as it supports expressive models with a compact parametrization less susceptible to overfitting. We present competitive experimental results on several tasks, including language modeling and a near state of the art result on sequential randomly-permuted MNIST classification, a hard task on natural data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="miceli-barone-2018-low">
<titleInfo>
<title>Low-rank passthrough neural networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="given">Valerio</namePart>
<namePart type="family">Miceli Barone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reza</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Foster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bahar</namePart>
<namePart type="family">Salehi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Various common deep learning architectures, such as LSTMs, GRUs, Resnets and Highway Networks, employ state passthrough connections that support training with high feed-forward depth or recurrence over many time steps. These “Passthrough Networks” architectures also enable the decoupling of the network state size from the number of parameters of the network, a possibility has been studied by Sak et al. (2014) with their low-rank parametrization of the LSTM. In this work we extend this line of research, proposing effective, low-rank and low-rank plus diagonal matrix parametrizations for Passthrough Networks which exploit this decoupling property, reducing the data complexity and memory requirements of the network while preserving its memory capacity. This is particularly beneficial in low-resource settings as it supports expressive models with a compact parametrization less susceptible to overfitting. We present competitive experimental results on several tasks, including language modeling and a near state of the art result on sequential randomly-permuted MNIST classification, a hard task on natural data.</abstract>
<identifier type="citekey">miceli-barone-2018-low</identifier>
<identifier type="doi">10.18653/v1/W18-3410</identifier>
<location>
<url>https://aclanthology.org/W18-3410</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>77</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Low-rank passthrough neural networks
%A Miceli Barone, Antonio Valerio
%Y Haffari, Reza
%Y Cherry, Colin
%Y Foster, George
%Y Khadivi, Shahram
%Y Salehi, Bahar
%S Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne
%F miceli-barone-2018-low
%X Various common deep learning architectures, such as LSTMs, GRUs, Resnets and Highway Networks, employ state passthrough connections that support training with high feed-forward depth or recurrence over many time steps. These “Passthrough Networks” architectures also enable the decoupling of the network state size from the number of parameters of the network, a possibility has been studied by Sak et al. (2014) with their low-rank parametrization of the LSTM. In this work we extend this line of research, proposing effective, low-rank and low-rank plus diagonal matrix parametrizations for Passthrough Networks which exploit this decoupling property, reducing the data complexity and memory requirements of the network while preserving its memory capacity. This is particularly beneficial in low-resource settings as it supports expressive models with a compact parametrization less susceptible to overfitting. We present competitive experimental results on several tasks, including language modeling and a near state of the art result on sequential randomly-permuted MNIST classification, a hard task on natural data.
%R 10.18653/v1/W18-3410
%U https://aclanthology.org/W18-3410
%U https://doi.org/10.18653/v1/W18-3410
%P 77-86
Markdown (Informal)
[Low-rank passthrough neural networks](https://aclanthology.org/W18-3410) (Miceli Barone, ACL 2018)
ACL
- Antonio Valerio Miceli Barone. 2018. Low-rank passthrough neural networks. In Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP, pages 77–86, Melbourne. Association for Computational Linguistics.