@inproceedings{kolachina-ranta-2019-bootstrapping,
title = "Bootstrapping {UD} treebanks for Delexicalized Parsing",
author = "Kolachina, Prasanth and
Ranta, Aarne",
editor = "Hartmann, Mareike and
Plank, Barbara",
booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
month = sep # "{--}" # oct,
year = "2019",
address = "Turku, Finland",
publisher = {Link{\"o}ping University Electronic Press},
url = "https://aclanthology.org/W19-6102",
pages = "15--24",
abstract = "Standard approaches to treebanking traditionally employ a waterfall model (Sommerville, 2010), where annotation guidelines guide the annotation process and insights from the annotation process in turn lead to subsequent changes in the annotation guidelines. This process remains a very expensive step in creating linguistic resources for a target language, necessitates both linguistic expertise and manual effort to develop the annotations and is subject to inconsistencies in the annotation due to human errors. In this paper, we propose an alternative approach to treebanking{---}one that requires writing grammars. This approach is motivated specifically in the context of Universal Dependencies, an effort to develop uniform and cross-lingually consistent treebanks across multiple languages. We show here that a bootstrapping approach to treebanking via interlingual grammars is plausible and useful in a process where grammar engineering and treebanking are jointly pursued when creating resources for the target language. We demonstrate the usefulness of synthetic treebanks in the task of delexicalized parsing. Our experiments reveal that simple models for treebank generation are cheaper than human annotated treebanks, especially in the lower ends of the learning curves for delexicalized parsing, which is relevant in particular in the context of low-resource languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kolachina-ranta-2019-bootstrapping">
<titleInfo>
<title>Bootstrapping UD treebanks for Delexicalized Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prasanth</namePart>
<namePart type="family">Kolachina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aarne</namePart>
<namePart type="family">Ranta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-sep–oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Nordic Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mareike</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press</publisher>
<place>
<placeTerm type="text">Turku, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Standard approaches to treebanking traditionally employ a waterfall model (Sommerville, 2010), where annotation guidelines guide the annotation process and insights from the annotation process in turn lead to subsequent changes in the annotation guidelines. This process remains a very expensive step in creating linguistic resources for a target language, necessitates both linguistic expertise and manual effort to develop the annotations and is subject to inconsistencies in the annotation due to human errors. In this paper, we propose an alternative approach to treebanking—one that requires writing grammars. This approach is motivated specifically in the context of Universal Dependencies, an effort to develop uniform and cross-lingually consistent treebanks across multiple languages. We show here that a bootstrapping approach to treebanking via interlingual grammars is plausible and useful in a process where grammar engineering and treebanking are jointly pursued when creating resources for the target language. We demonstrate the usefulness of synthetic treebanks in the task of delexicalized parsing. Our experiments reveal that simple models for treebank generation are cheaper than human annotated treebanks, especially in the lower ends of the learning curves for delexicalized parsing, which is relevant in particular in the context of low-resource languages.</abstract>
<identifier type="citekey">kolachina-ranta-2019-bootstrapping</identifier>
<location>
<url>https://aclanthology.org/W19-6102</url>
</location>
<part>
<date>2019-sep–oct</date>
<extent unit="page">
<start>15</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bootstrapping UD treebanks for Delexicalized Parsing
%A Kolachina, Prasanth
%A Ranta, Aarne
%Y Hartmann, Mareike
%Y Plank, Barbara
%S Proceedings of the 22nd Nordic Conference on Computational Linguistics
%D 2019
%8 sep–oct
%I Linköping University Electronic Press
%C Turku, Finland
%F kolachina-ranta-2019-bootstrapping
%X Standard approaches to treebanking traditionally employ a waterfall model (Sommerville, 2010), where annotation guidelines guide the annotation process and insights from the annotation process in turn lead to subsequent changes in the annotation guidelines. This process remains a very expensive step in creating linguistic resources for a target language, necessitates both linguistic expertise and manual effort to develop the annotations and is subject to inconsistencies in the annotation due to human errors. In this paper, we propose an alternative approach to treebanking—one that requires writing grammars. This approach is motivated specifically in the context of Universal Dependencies, an effort to develop uniform and cross-lingually consistent treebanks across multiple languages. We show here that a bootstrapping approach to treebanking via interlingual grammars is plausible and useful in a process where grammar engineering and treebanking are jointly pursued when creating resources for the target language. We demonstrate the usefulness of synthetic treebanks in the task of delexicalized parsing. Our experiments reveal that simple models for treebank generation are cheaper than human annotated treebanks, especially in the lower ends of the learning curves for delexicalized parsing, which is relevant in particular in the context of low-resource languages.
%U https://aclanthology.org/W19-6102
%P 15-24
Markdown (Informal)
[Bootstrapping UD treebanks for Delexicalized Parsing](https://aclanthology.org/W19-6102) (Kolachina & Ranta, NoDaLiDa 2019)
ACL