@article{kadar-etal-2017-representation,
title = "Representation of Linguistic Form and Function in Recurrent Neural Networks",
author = "K{\'a}d{\'a}r, {\'A}kos and
Chrupa{\l}a, Grzegorz and
Alishahi, Afra",
journal = "Computational Linguistics",
volume = "43",
number = "4",
month = dec,
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J17-4003",
doi = "10.1162/COLI_a_00300",
pages = "761--780",
abstract = "We present novel methods for analyzing the activation patterns of recurrent neural networks from a linguistic point of view and explore the types of linguistic structure they learn. As a case study, we use a standard standalone language model, and a multi-task gated recurrent network architecture consisting of two parallel pathways with shared word embeddings: The Visual pathway is trained on predicting the representations of the visual scene corresponding to an input sentence, and the Textual pathway is trained to predict the next word in the same sentence. We propose a method for estimating the amount of contribution of individual tokens in the input to the final prediction of the networks. Using this method, we show that the Visual pathway pays selective attention to lexical categories and grammatical functions that carry semantic information, and learns to treat word types differently depending on their grammatical function and their position in the sequential structure of the sentence. In contrast, the language models are comparatively more sensitive to words with a syntactic function. Further analysis of the most informative n-gram contexts for each model shows that in comparison with the Visual pathway, the language models react more strongly to abstract contexts that represent syntactic constructions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kadar-etal-2017-representation">
<titleInfo>
<title>Representation of Linguistic Form and Function in Recurrent Neural Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ákos</namePart>
<namePart type="family">Kádár</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Grzegorz</namePart>
<namePart type="family">Chrupała</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afra</namePart>
<namePart type="family">Alishahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We present novel methods for analyzing the activation patterns of recurrent neural networks from a linguistic point of view and explore the types of linguistic structure they learn. As a case study, we use a standard standalone language model, and a multi-task gated recurrent network architecture consisting of two parallel pathways with shared word embeddings: The Visual pathway is trained on predicting the representations of the visual scene corresponding to an input sentence, and the Textual pathway is trained to predict the next word in the same sentence. We propose a method for estimating the amount of contribution of individual tokens in the input to the final prediction of the networks. Using this method, we show that the Visual pathway pays selective attention to lexical categories and grammatical functions that carry semantic information, and learns to treat word types differently depending on their grammatical function and their position in the sequential structure of the sentence. In contrast, the language models are comparatively more sensitive to words with a syntactic function. Further analysis of the most informative n-gram contexts for each model shows that in comparison with the Visual pathway, the language models react more strongly to abstract contexts that represent syntactic constructions.</abstract>
<identifier type="citekey">kadar-etal-2017-representation</identifier>
<identifier type="doi">10.1162/COLI_a_00300</identifier>
<location>
<url>https://aclanthology.org/J17-4003</url>
</location>
<part>
<date>2017-12</date>
<detail type="volume"><number>43</number></detail>
<detail type="issue"><number>4</number></detail>
<extent unit="page">
<start>761</start>
<end>780</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Representation of Linguistic Form and Function in Recurrent Neural Networks
%A Kádár, Ákos
%A Chrupała, Grzegorz
%A Alishahi, Afra
%J Computational Linguistics
%D 2017
%8 December
%V 43
%N 4
%I MIT Press
%C Cambridge, MA
%F kadar-etal-2017-representation
%X We present novel methods for analyzing the activation patterns of recurrent neural networks from a linguistic point of view and explore the types of linguistic structure they learn. As a case study, we use a standard standalone language model, and a multi-task gated recurrent network architecture consisting of two parallel pathways with shared word embeddings: The Visual pathway is trained on predicting the representations of the visual scene corresponding to an input sentence, and the Textual pathway is trained to predict the next word in the same sentence. We propose a method for estimating the amount of contribution of individual tokens in the input to the final prediction of the networks. Using this method, we show that the Visual pathway pays selective attention to lexical categories and grammatical functions that carry semantic information, and learns to treat word types differently depending on their grammatical function and their position in the sequential structure of the sentence. In contrast, the language models are comparatively more sensitive to words with a syntactic function. Further analysis of the most informative n-gram contexts for each model shows that in comparison with the Visual pathway, the language models react more strongly to abstract contexts that represent syntactic constructions.
%R 10.1162/COLI_a_00300
%U https://aclanthology.org/J17-4003
%U https://doi.org/10.1162/COLI_a_00300
%P 761-780
Markdown (Informal)
[Representation of Linguistic Form and Function in Recurrent Neural Networks](https://aclanthology.org/J17-4003) (Kádár et al., CL 2017)
ACL