@article{wang-eisner-2016-galactic,
title = "The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages",
author = "Wang, Dingquan and
Eisner, Jason",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "4",
year = "2016",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q16-1035/",
doi = "10.1162/tacl_a_00113",
pages = "491--505",
abstract = "We release Galactic Dependencies 1.0{---}a large set of synthetic languages not found on Earth, but annotated in Universal Dependencies format. This new resource aims to provide training and development data for NLP methods that aim to adapt to unfamiliar languages. Each synthetic treebank is produced from a real treebank by stochastically permuting the dependents of nouns and/or verbs to match the word order of other real languages. We discuss the usefulness, realism, parsability, perplexity, and diversity of the synthetic languages. As a simple demonstration of the use of Galactic Dependencies, we consider single-source transfer, which attempts to parse a real target language using a parser trained on a {\textquotedblleft}nearby{\textquotedblright} source language. We find that including synthetic source languages somewhat increases the diversity of the source pool, which significantly improves results for most target languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-eisner-2016-galactic">
<titleInfo>
<title>The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dingquan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Eisner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We release Galactic Dependencies 1.0—a large set of synthetic languages not found on Earth, but annotated in Universal Dependencies format. This new resource aims to provide training and development data for NLP methods that aim to adapt to unfamiliar languages. Each synthetic treebank is produced from a real treebank by stochastically permuting the dependents of nouns and/or verbs to match the word order of other real languages. We discuss the usefulness, realism, parsability, perplexity, and diversity of the synthetic languages. As a simple demonstration of the use of Galactic Dependencies, we consider single-source transfer, which attempts to parse a real target language using a parser trained on a “nearby” source language. We find that including synthetic source languages somewhat increases the diversity of the source pool, which significantly improves results for most target languages.</abstract>
<identifier type="citekey">wang-eisner-2016-galactic</identifier>
<identifier type="doi">10.1162/tacl_a_00113</identifier>
<location>
<url>https://aclanthology.org/Q16-1035/</url>
</location>
<part>
<date>2016</date>
<detail type="volume"><number>4</number></detail>
<extent unit="page">
<start>491</start>
<end>505</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages
%A Wang, Dingquan
%A Eisner, Jason
%J Transactions of the Association for Computational Linguistics
%D 2016
%V 4
%I MIT Press
%C Cambridge, MA
%F wang-eisner-2016-galactic
%X We release Galactic Dependencies 1.0—a large set of synthetic languages not found on Earth, but annotated in Universal Dependencies format. This new resource aims to provide training and development data for NLP methods that aim to adapt to unfamiliar languages. Each synthetic treebank is produced from a real treebank by stochastically permuting the dependents of nouns and/or verbs to match the word order of other real languages. We discuss the usefulness, realism, parsability, perplexity, and diversity of the synthetic languages. As a simple demonstration of the use of Galactic Dependencies, we consider single-source transfer, which attempts to parse a real target language using a parser trained on a “nearby” source language. We find that including synthetic source languages somewhat increases the diversity of the source pool, which significantly improves results for most target languages.
%R 10.1162/tacl_a_00113
%U https://aclanthology.org/Q16-1035/
%U https://doi.org/10.1162/tacl_a_00113
%P 491-505
Markdown (Informal)
[The Galactic Dependencies Treebanks: Getting More Data by Synthesizing New Languages](https://aclanthology.org/Q16-1035/) (Wang & Eisner, TACL 2016)
ACL