@inproceedings{ding-koehn-2019-parallelizable,
title = "Parallelizable Stack Long Short-Term Memory",
author = "Ding, Shuoyang and
Koehn, Philipp",
editor = "Martins, Andre and
Vlachos, Andreas and
Kozareva, Zornitsa and
Ravi, Sujith and
Lampouras, Gerasimos and
Niculae, Vlad and
Kreutzer, Julia",
booktitle = "Proceedings of the Third Workshop on Structured Prediction for {NLP}",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-1501/",
doi = "10.18653/v1/W19-1501",
pages = "1--6",
abstract = "Stack Long Short-Term Memory (StackLSTM) is useful for various applications such as parsing and string-to-tree neural machine translation, but it is also known to be notoriously difficult to parallelize for GPU training due to the fact that the computations are dependent on discrete operations. In this paper, we tackle this problem by utilizing state access patterns of StackLSTM to homogenize computations with regard to different discrete operations. Our parsing experiments show that the method scales up almost linearly with increasing batch size, and our parallelized PyTorch implementation trains significantly faster compared to the Dynet C++ implementation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ding-koehn-2019-parallelizable">
<titleInfo>
<title>Parallelizable Stack Long Short-Term Memory</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuoyang</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Structured Prediction for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Lampouras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vlad</namePart>
<namePart type="family">Niculae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Kreutzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Stack Long Short-Term Memory (StackLSTM) is useful for various applications such as parsing and string-to-tree neural machine translation, but it is also known to be notoriously difficult to parallelize for GPU training due to the fact that the computations are dependent on discrete operations. In this paper, we tackle this problem by utilizing state access patterns of StackLSTM to homogenize computations with regard to different discrete operations. Our parsing experiments show that the method scales up almost linearly with increasing batch size, and our parallelized PyTorch implementation trains significantly faster compared to the Dynet C++ implementation.</abstract>
<identifier type="citekey">ding-koehn-2019-parallelizable</identifier>
<identifier type="doi">10.18653/v1/W19-1501</identifier>
<location>
<url>https://aclanthology.org/W19-1501/</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Parallelizable Stack Long Short-Term Memory
%A Ding, Shuoyang
%A Koehn, Philipp
%Y Martins, Andre
%Y Vlachos, Andreas
%Y Kozareva, Zornitsa
%Y Ravi, Sujith
%Y Lampouras, Gerasimos
%Y Niculae, Vlad
%Y Kreutzer, Julia
%S Proceedings of the Third Workshop on Structured Prediction for NLP
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F ding-koehn-2019-parallelizable
%X Stack Long Short-Term Memory (StackLSTM) is useful for various applications such as parsing and string-to-tree neural machine translation, but it is also known to be notoriously difficult to parallelize for GPU training due to the fact that the computations are dependent on discrete operations. In this paper, we tackle this problem by utilizing state access patterns of StackLSTM to homogenize computations with regard to different discrete operations. Our parsing experiments show that the method scales up almost linearly with increasing batch size, and our parallelized PyTorch implementation trains significantly faster compared to the Dynet C++ implementation.
%R 10.18653/v1/W19-1501
%U https://aclanthology.org/W19-1501/
%U https://doi.org/10.18653/v1/W19-1501
%P 1-6
Markdown (Informal)
[Parallelizable Stack Long Short-Term Memory](https://aclanthology.org/W19-1501/) (Ding & Koehn, NAACL 2019)
ACL
- Shuoyang Ding and Philipp Koehn. 2019. Parallelizable Stack Long Short-Term Memory. In Proceedings of the Third Workshop on Structured Prediction for NLP, pages 1–6, Minneapolis, Minnesota. Association for Computational Linguistics.