@article{bjerva-etal-2019-language,
title = "What Do Language Representations Really Represent?",
author = {Bjerva, Johannes and
{\"O}stling, Robert and
Veiga, Maria Han and
Tiedemann, J{\"o}rg and
Augenstein, Isabelle},
journal = "Computational Linguistics",
volume = "45",
number = "2",
month = jun,
year = "2019",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/J19-2006",
doi = "10.1162/coli_a_00351",
pages = "381--389",
abstract = "A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships{---}a convenient benchmark used for evaluation in previous work{---}appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bjerva-etal-2019-language">
<titleInfo>
<title>What Do Language Representations Really Represent?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Bjerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Östling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Veiga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships—a convenient benchmark used for evaluation in previous work—appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another.</abstract>
<identifier type="citekey">bjerva-etal-2019-language</identifier>
<identifier type="doi">10.1162/coli_a_00351</identifier>
<location>
<url>https://aclanthology.org/J19-2006</url>
</location>
<part>
<date>2019-06</date>
<detail type="volume"><number>45</number></detail>
<detail type="issue"><number>2</number></detail>
<extent unit="page">
<start>381</start>
<end>389</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T What Do Language Representations Really Represent?
%A Bjerva, Johannes
%A Östling, Robert
%A Veiga, Maria Han
%A Tiedemann, Jörg
%A Augenstein, Isabelle
%J Computational Linguistics
%D 2019
%8 June
%V 45
%N 2
%I MIT Press
%C Cambridge, MA
%F bjerva-etal-2019-language
%X A neural language model trained on a text corpus can be used to induce distributed representations of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations. We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships—a convenient benchmark used for evaluation in previous work—appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another.
%R 10.1162/coli_a_00351
%U https://aclanthology.org/J19-2006
%U https://doi.org/10.1162/coli_a_00351
%P 381-389
Markdown (Informal)
[What Do Language Representations Really Represent?](https://aclanthology.org/J19-2006) (Bjerva et al., CL 2019)
ACL