@inproceedings{suzgun-etal-2019-lstm,
title = "{LSTM} Networks Can Perform Dynamic Counting",
author = "Suzgun, Mirac and
Belinkov, Yonatan and
Shieber, Stuart and
Gehrmann, Sebastian",
editor = "Eisner, Jason and
Gall{\'e}, Matthias and
Heinz, Jeffrey and
Quattoni, Ariadna and
Rabusseau, Guillaume",
booktitle = "Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges",
month = aug,
year = "2019",
address = "Florence",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3905/",
doi = "10.18653/v1/W19-3905",
pages = "44--54",
abstract = "In this paper, we systematically assess the ability of standard recurrent networks to perform dynamic counting and to encode hierarchical representations. All the neural models in our experiments are designed to be small-sized networks both to prevent them from memorizing the training sets and to visualize and interpret their behaviour at test time. Our results demonstrate that the Long Short-Term Memory (LSTM) networks can learn to recognize the well-balanced parenthesis language (Dyck-1) and the shuffles of multiple Dyck-1 languages, each defined over different parenthesis-pairs, by emulating simple real-time k-counter machines. To the best of our knowledge, this work is the first study to introduce the shuffle languages to analyze the computational power of neural networks. We also show that a single-layer LSTM with only one hidden unit is practically sufficient for recognizing the Dyck-1 language. However, none of our recurrent networks was able to yield a good performance on the Dyck-2 language learning task, which requires a model to have a stack-like mechanism for recognition."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suzgun-etal-2019-lstm">
<titleInfo>
<title>LSTM Networks Can Perform Dynamic Counting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mirac</namePart>
<namePart type="family">Suzgun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Shieber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Gehrmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Eisner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeffrey</namePart>
<namePart type="family">Heinz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ariadna</namePart>
<namePart type="family">Quattoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Rabusseau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we systematically assess the ability of standard recurrent networks to perform dynamic counting and to encode hierarchical representations. All the neural models in our experiments are designed to be small-sized networks both to prevent them from memorizing the training sets and to visualize and interpret their behaviour at test time. Our results demonstrate that the Long Short-Term Memory (LSTM) networks can learn to recognize the well-balanced parenthesis language (Dyck-1) and the shuffles of multiple Dyck-1 languages, each defined over different parenthesis-pairs, by emulating simple real-time k-counter machines. To the best of our knowledge, this work is the first study to introduce the shuffle languages to analyze the computational power of neural networks. We also show that a single-layer LSTM with only one hidden unit is practically sufficient for recognizing the Dyck-1 language. However, none of our recurrent networks was able to yield a good performance on the Dyck-2 language learning task, which requires a model to have a stack-like mechanism for recognition.</abstract>
<identifier type="citekey">suzgun-etal-2019-lstm</identifier>
<identifier type="doi">10.18653/v1/W19-3905</identifier>
<location>
<url>https://aclanthology.org/W19-3905/</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>44</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LSTM Networks Can Perform Dynamic Counting
%A Suzgun, Mirac
%A Belinkov, Yonatan
%A Shieber, Stuart
%A Gehrmann, Sebastian
%Y Eisner, Jason
%Y Gallé, Matthias
%Y Heinz, Jeffrey
%Y Quattoni, Ariadna
%Y Rabusseau, Guillaume
%S Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence
%F suzgun-etal-2019-lstm
%X In this paper, we systematically assess the ability of standard recurrent networks to perform dynamic counting and to encode hierarchical representations. All the neural models in our experiments are designed to be small-sized networks both to prevent them from memorizing the training sets and to visualize and interpret their behaviour at test time. Our results demonstrate that the Long Short-Term Memory (LSTM) networks can learn to recognize the well-balanced parenthesis language (Dyck-1) and the shuffles of multiple Dyck-1 languages, each defined over different parenthesis-pairs, by emulating simple real-time k-counter machines. To the best of our knowledge, this work is the first study to introduce the shuffle languages to analyze the computational power of neural networks. We also show that a single-layer LSTM with only one hidden unit is practically sufficient for recognizing the Dyck-1 language. However, none of our recurrent networks was able to yield a good performance on the Dyck-2 language learning task, which requires a model to have a stack-like mechanism for recognition.
%R 10.18653/v1/W19-3905
%U https://aclanthology.org/W19-3905/
%U https://doi.org/10.18653/v1/W19-3905
%P 44-54
Markdown (Informal)
[LSTM Networks Can Perform Dynamic Counting](https://aclanthology.org/W19-3905/) (Suzgun et al., ACL 2019)
ACL
- Mirac Suzgun, Yonatan Belinkov, Stuart Shieber, and Sebastian Gehrmann. 2019. LSTM Networks Can Perform Dynamic Counting. In Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges, pages 44–54, Florence. Association for Computational Linguistics.