@article{murawaki-2019-bayesian,
title = "{B}ayesian Learning of Latent Representations of Language Structures",
author = "Murawaki, Yugo",
journal = "Computational Linguistics",
volume = "45",
number = "2",
month = jun,
year = "2019",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J19-2001/",
doi = "10.1162/coli_a_00346",
pages = "199--228",
abstract = "We borrow the concept of representation learning from deep learning research, and we argue that the quest for Greenbergian implicational universals can be reformulated as the learning of good latent representations of languages, or sequences of surface typological features. By projecting languages into latent representations and performing inference in the latent space, we can handle complex dependencies among features in an implicit manner. The most challenging problem in turning the idea into a concrete computational model is the alarmingly large number of missing values in existing typological databases. To address this problem, we keep the number of model parameters relatively small to avoid overfitting, adopt the Bayesian learning framework for its robustness, and exploit phylogenetically and/or spatially related languages as additional clues. Experiments show that the proposed model recovers missing values more accurately than others and that some latent variables exhibit phylogenetic and spatial signals comparable to those of surface features."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="murawaki-2019-bayesian">
<titleInfo>
<title>Bayesian Learning of Latent Representations of Language Structures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yugo</namePart>
<namePart type="family">Murawaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We borrow the concept of representation learning from deep learning research, and we argue that the quest for Greenbergian implicational universals can be reformulated as the learning of good latent representations of languages, or sequences of surface typological features. By projecting languages into latent representations and performing inference in the latent space, we can handle complex dependencies among features in an implicit manner. The most challenging problem in turning the idea into a concrete computational model is the alarmingly large number of missing values in existing typological databases. To address this problem, we keep the number of model parameters relatively small to avoid overfitting, adopt the Bayesian learning framework for its robustness, and exploit phylogenetically and/or spatially related languages as additional clues. Experiments show that the proposed model recovers missing values more accurately than others and that some latent variables exhibit phylogenetic and spatial signals comparable to those of surface features.</abstract>
<identifier type="citekey">murawaki-2019-bayesian</identifier>
<identifier type="doi">10.1162/coli_a_00346</identifier>
<location>
<url>https://aclanthology.org/J19-2001/</url>
</location>
<part>
<date>2019-06</date>
<detail type="volume"><number>45</number></detail>
<detail type="issue"><number>2</number></detail>
<extent unit="page">
<start>199</start>
<end>228</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Bayesian Learning of Latent Representations of Language Structures
%A Murawaki, Yugo
%J Computational Linguistics
%D 2019
%8 June
%V 45
%N 2
%I MIT Press
%C Cambridge, MA
%F murawaki-2019-bayesian
%X We borrow the concept of representation learning from deep learning research, and we argue that the quest for Greenbergian implicational universals can be reformulated as the learning of good latent representations of languages, or sequences of surface typological features. By projecting languages into latent representations and performing inference in the latent space, we can handle complex dependencies among features in an implicit manner. The most challenging problem in turning the idea into a concrete computational model is the alarmingly large number of missing values in existing typological databases. To address this problem, we keep the number of model parameters relatively small to avoid overfitting, adopt the Bayesian learning framework for its robustness, and exploit phylogenetically and/or spatially related languages as additional clues. Experiments show that the proposed model recovers missing values more accurately than others and that some latent variables exhibit phylogenetic and spatial signals comparable to those of surface features.
%R 10.1162/coli_a_00346
%U https://aclanthology.org/J19-2001/
%U https://doi.org/10.1162/coli_a_00346
%P 199-228
Markdown (Informal)
[Bayesian Learning of Latent Representations of Language Structures](https://aclanthology.org/J19-2001/) (Murawaki, CL 2019)
ACL