@inproceedings{weber-etal-2018-fine,
title = "The Fine Line between Linguistic Generalization and Failure in {S}eq2{S}eq-Attention Models",
author = "Weber, Noah and
Shekhar, Leena and
Balasubramanian, Niranjan",
editor = "Bisk, Yonatan and
Levy, Omer and
Yatskar, Mark",
booktitle = "Proceedings of the Workshop on Generalization in the Age of Deep Learning",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1004/",
doi = "10.18653/v1/W18-1004",
pages = "24--27",
abstract = "Seq2Seq based neural architectures have become the go-to architecture to apply to sequence to sequence language tasks. Despite their excellent performance on these tasks, recent work has noted that these models typically do not fully capture the linguistic structure required to generalize beyond the dense sections of the data distribution (Ettinger et al., 2017), and as such, are likely to fail on examples from the tail end of the distribution (such as inputs that are noisy (Belinkov and Bisk, 2018), or of different length (Bentivogli et al., 2016)). In this paper we look at a model`s ability to generalize on a simple symbol rewriting task with a clearly defined structure. We find that the model`s ability to generalize this structure beyond the training distribution depends greatly on the chosen random seed, even when performance on the test set remains the same. This finding suggests that model`s ability to capture generalizable structure is highly sensitive, and more so, this sensitivity may not be apparent when evaluating the model on standard test sets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="weber-etal-2018-fine">
<titleInfo>
<title>The Fine Line between Linguistic Generalization and Failure in Seq2Seq-Attention Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="family">Weber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leena</namePart>
<namePart type="family">Shekhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niranjan</namePart>
<namePart type="family">Balasubramanian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Generalization in the Age of Deep Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Bisk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omer</namePart>
<namePart type="family">Levy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Yatskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Seq2Seq based neural architectures have become the go-to architecture to apply to sequence to sequence language tasks. Despite their excellent performance on these tasks, recent work has noted that these models typically do not fully capture the linguistic structure required to generalize beyond the dense sections of the data distribution (Ettinger et al., 2017), and as such, are likely to fail on examples from the tail end of the distribution (such as inputs that are noisy (Belinkov and Bisk, 2018), or of different length (Bentivogli et al., 2016)). In this paper we look at a model‘s ability to generalize on a simple symbol rewriting task with a clearly defined structure. We find that the model‘s ability to generalize this structure beyond the training distribution depends greatly on the chosen random seed, even when performance on the test set remains the same. This finding suggests that model‘s ability to capture generalizable structure is highly sensitive, and more so, this sensitivity may not be apparent when evaluating the model on standard test sets.</abstract>
<identifier type="citekey">weber-etal-2018-fine</identifier>
<identifier type="doi">10.18653/v1/W18-1004</identifier>
<location>
<url>https://aclanthology.org/W18-1004/</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>24</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Fine Line between Linguistic Generalization and Failure in Seq2Seq-Attention Models
%A Weber, Noah
%A Shekhar, Leena
%A Balasubramanian, Niranjan
%Y Bisk, Yonatan
%Y Levy, Omer
%Y Yatskar, Mark
%S Proceedings of the Workshop on Generalization in the Age of Deep Learning
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F weber-etal-2018-fine
%X Seq2Seq based neural architectures have become the go-to architecture to apply to sequence to sequence language tasks. Despite their excellent performance on these tasks, recent work has noted that these models typically do not fully capture the linguistic structure required to generalize beyond the dense sections of the data distribution (Ettinger et al., 2017), and as such, are likely to fail on examples from the tail end of the distribution (such as inputs that are noisy (Belinkov and Bisk, 2018), or of different length (Bentivogli et al., 2016)). In this paper we look at a model‘s ability to generalize on a simple symbol rewriting task with a clearly defined structure. We find that the model‘s ability to generalize this structure beyond the training distribution depends greatly on the chosen random seed, even when performance on the test set remains the same. This finding suggests that model‘s ability to capture generalizable structure is highly sensitive, and more so, this sensitivity may not be apparent when evaluating the model on standard test sets.
%R 10.18653/v1/W18-1004
%U https://aclanthology.org/W18-1004/
%U https://doi.org/10.18653/v1/W18-1004
%P 24-27
Markdown (Informal)
[The Fine Line between Linguistic Generalization and Failure in Seq2Seq-Attention Models](https://aclanthology.org/W18-1004/) (Weber et al., Gen-Deep 2018)
ACL