@article{begus-2021-identity,
title = "Identity-Based Patterns in Deep Convolutional Networks: Generative Adversarial Phonology and Reduplication",
author = "Begu{\v{s}}, Ga{\v{s}}per",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.70",
doi = "10.1162/tacl_a_00421",
pages = "1180--1196",
abstract = "This paper models unsupervised learning of an identity-based pattern (or copying) in speech called reduplication from raw continuous data with deep convolutional neural networks. We use the ciwGAN architecture (Begu{\v{s}}, 2021a) in which learning of meaningful representations in speech emerges from a requirement that the CNNs generate informative data. We propose a technique to wug-test CNNs trained on speech and, based on four generative tests, argue that the network learns to represent an identity-based pattern in its latent space. By manipulating only two categorical variables in the latent space, we can actively turn an unreduplicated form into a reduplicated form with no other substantial changes to the output in the majority of cases. We also argue that the network extends the identity-based pattern to unobserved data. Exploration of how meaningful representations of identity-based patterns emerge in CNNs and how the latent space variables outside of the training range correlate with identity-based patterns in the output has general implications for neural network interpretability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="begus-2021-identity">
<titleInfo>
<title>Identity-Based Patterns in Deep Convolutional Networks: Generative Adversarial Phonology and Reduplication</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gašper</namePart>
<namePart type="family">Beguš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper models unsupervised learning of an identity-based pattern (or copying) in speech called reduplication from raw continuous data with deep convolutional neural networks. We use the ciwGAN architecture (Beguš, 2021a) in which learning of meaningful representations in speech emerges from a requirement that the CNNs generate informative data. We propose a technique to wug-test CNNs trained on speech and, based on four generative tests, argue that the network learns to represent an identity-based pattern in its latent space. By manipulating only two categorical variables in the latent space, we can actively turn an unreduplicated form into a reduplicated form with no other substantial changes to the output in the majority of cases. We also argue that the network extends the identity-based pattern to unobserved data. Exploration of how meaningful representations of identity-based patterns emerge in CNNs and how the latent space variables outside of the training range correlate with identity-based patterns in the output has general implications for neural network interpretability.</abstract>
<identifier type="citekey">begus-2021-identity</identifier>
<identifier type="doi">10.1162/tacl_a_00421</identifier>
<location>
<url>https://aclanthology.org/2021.tacl-1.70</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>9</number></detail>
<extent unit="page">
<start>1180</start>
<end>1196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Identity-Based Patterns in Deep Convolutional Networks: Generative Adversarial Phonology and Reduplication
%A Beguš, Gašper
%J Transactions of the Association for Computational Linguistics
%D 2021
%V 9
%I MIT Press
%C Cambridge, MA
%F begus-2021-identity
%X This paper models unsupervised learning of an identity-based pattern (or copying) in speech called reduplication from raw continuous data with deep convolutional neural networks. We use the ciwGAN architecture (Beguš, 2021a) in which learning of meaningful representations in speech emerges from a requirement that the CNNs generate informative data. We propose a technique to wug-test CNNs trained on speech and, based on four generative tests, argue that the network learns to represent an identity-based pattern in its latent space. By manipulating only two categorical variables in the latent space, we can actively turn an unreduplicated form into a reduplicated form with no other substantial changes to the output in the majority of cases. We also argue that the network extends the identity-based pattern to unobserved data. Exploration of how meaningful representations of identity-based patterns emerge in CNNs and how the latent space variables outside of the training range correlate with identity-based patterns in the output has general implications for neural network interpretability.
%R 10.1162/tacl_a_00421
%U https://aclanthology.org/2021.tacl-1.70
%U https://doi.org/10.1162/tacl_a_00421
%P 1180-1196
Markdown (Informal)
[Identity-Based Patterns in Deep Convolutional Networks: Generative Adversarial Phonology and Reduplication](https://aclanthology.org/2021.tacl-1.70) (Beguš, TACL 2021)
ACL