@inproceedings{hornby-etal-2017-corpus,
title = "Corpus Selection Approaches for Multilingual Parsing from Raw Text to {U}niversal {D}ependencies",
author = "Hornby, Ryan and
Taylor, Clark and
Park, Jungyeul",
editor = "Haji{\v{c}}, Jan and
Zeman, Dan",
booktitle = "Proceedings of the {C}o{NLL} 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/K17-3021",
doi = "10.18653/v1/K17-3021",
pages = "198--206",
abstract = {This paper describes UALing{'}s approach to the \textit{CoNLL 2017 UD Shared Task} using corpus selection techniques to reduce training data size. The methodology is simple: we use similarity measures to select a corpus from available training data (even from multiple corpora for surprise languages) and use the resulting corpus to complete the parsing task. The training and parsing is done with the baseline UDPipe system (Straka et al., 2016). While our approach reduces the size of training data significantly, it retains performance within 0.5{\%} of the baseline system. Due to the reduction in training data size, our system performs faster than the na{\"\i}ve, complete corpus method. Specifically, our system runs in less than 10 minutes, ranking it among the fastest entries for this task. Our system is available at \url{https://github.com/CoNLL-UD-2017/UALING}.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hornby-etal-2017-corpus">
<titleInfo>
<title>Corpus Selection Approaches for Multilingual Parsing from Raw Text to Universal Dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Hornby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clark</namePart>
<namePart type="family">Taylor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jungyeul</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Hajič</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Zeman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes UALing’s approach to the CoNLL 2017 UD Shared Task using corpus selection techniques to reduce training data size. The methodology is simple: we use similarity measures to select a corpus from available training data (even from multiple corpora for surprise languages) and use the resulting corpus to complete the parsing task. The training and parsing is done with the baseline UDPipe system (Straka et al., 2016). While our approach reduces the size of training data significantly, it retains performance within 0.5% of the baseline system. Due to the reduction in training data size, our system performs faster than the naïve, complete corpus method. Specifically, our system runs in less than 10 minutes, ranking it among the fastest entries for this task. Our system is available at https://github.com/CoNLL-UD-2017/UALING.</abstract>
<identifier type="citekey">hornby-etal-2017-corpus</identifier>
<identifier type="doi">10.18653/v1/K17-3021</identifier>
<location>
<url>https://aclanthology.org/K17-3021</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>198</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Selection Approaches for Multilingual Parsing from Raw Text to Universal Dependencies
%A Hornby, Ryan
%A Taylor, Clark
%A Park, Jungyeul
%Y Hajič, Jan
%Y Zeman, Dan
%S Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F hornby-etal-2017-corpus
%X This paper describes UALing’s approach to the CoNLL 2017 UD Shared Task using corpus selection techniques to reduce training data size. The methodology is simple: we use similarity measures to select a corpus from available training data (even from multiple corpora for surprise languages) and use the resulting corpus to complete the parsing task. The training and parsing is done with the baseline UDPipe system (Straka et al., 2016). While our approach reduces the size of training data significantly, it retains performance within 0.5% of the baseline system. Due to the reduction in training data size, our system performs faster than the naïve, complete corpus method. Specifically, our system runs in less than 10 minutes, ranking it among the fastest entries for this task. Our system is available at https://github.com/CoNLL-UD-2017/UALING.
%R 10.18653/v1/K17-3021
%U https://aclanthology.org/K17-3021
%U https://doi.org/10.18653/v1/K17-3021
%P 198-206
Markdown (Informal)
[Corpus Selection Approaches for Multilingual Parsing from Raw Text to Universal Dependencies](https://aclanthology.org/K17-3021) (Hornby et al., CoNLL 2017)
ACL