@inproceedings{straka-etal-2019-udpipe,
title = "{UDP}ipe at {SIGMORPHON} 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging",
author = "Straka, Milan and
Strakov{\'a}, Jana and
Hajic, Jan",
editor = "Nicolai, Garrett and
Cotterell, Ryan",
booktitle = "Proceedings of the 16th Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4212",
doi = "10.18653/v1/W19-4212",
pages = "95--103",
abstract = "We present our contribution to the SIGMORPHON 2019 Shared Task: Crosslinguality and Context in Morphology, Task 2: contextual morphological analysis and lemmatization. We submitted a modification of the UDPipe 2.0, one of best-performing systems of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies and an overall winner of the The 2018 Shared Task on Extrinsic Parser Evaluation. As our first improvement, we use the pretrained contextualized embeddings (BERT) as additional inputs to the network; secondly, we use individual morphological features as regularization; and finally, we merge the selected corpora of the same language. In the lemmatization task, our system exceeds all the submitted systems by a wide margin with lemmatization accuracy 95.78 (second best was 95.00, third 94.46). In the morphological analysis, our system placed tightly second: our morphological analysis accuracy was 93.19, the winning system{'}s 93.23.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="straka-etal-2019-udpipe">
<titleInfo>
<title>UDPipe at SIGMORPHON 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Milan</namePart>
<namePart type="family">Straka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jana</namePart>
<namePart type="family">Straková</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Hajic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our contribution to the SIGMORPHON 2019 Shared Task: Crosslinguality and Context in Morphology, Task 2: contextual morphological analysis and lemmatization. We submitted a modification of the UDPipe 2.0, one of best-performing systems of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies and an overall winner of the The 2018 Shared Task on Extrinsic Parser Evaluation. As our first improvement, we use the pretrained contextualized embeddings (BERT) as additional inputs to the network; secondly, we use individual morphological features as regularization; and finally, we merge the selected corpora of the same language. In the lemmatization task, our system exceeds all the submitted systems by a wide margin with lemmatization accuracy 95.78 (second best was 95.00, third 94.46). In the morphological analysis, our system placed tightly second: our morphological analysis accuracy was 93.19, the winning system’s 93.23.</abstract>
<identifier type="citekey">straka-etal-2019-udpipe</identifier>
<identifier type="doi">10.18653/v1/W19-4212</identifier>
<location>
<url>https://aclanthology.org/W19-4212</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>95</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UDPipe at SIGMORPHON 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging
%A Straka, Milan
%A Straková, Jana
%A Hajic, Jan
%Y Nicolai, Garrett
%Y Cotterell, Ryan
%S Proceedings of the 16th Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F straka-etal-2019-udpipe
%X We present our contribution to the SIGMORPHON 2019 Shared Task: Crosslinguality and Context in Morphology, Task 2: contextual morphological analysis and lemmatization. We submitted a modification of the UDPipe 2.0, one of best-performing systems of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies and an overall winner of the The 2018 Shared Task on Extrinsic Parser Evaluation. As our first improvement, we use the pretrained contextualized embeddings (BERT) as additional inputs to the network; secondly, we use individual morphological features as regularization; and finally, we merge the selected corpora of the same language. In the lemmatization task, our system exceeds all the submitted systems by a wide margin with lemmatization accuracy 95.78 (second best was 95.00, third 94.46). In the morphological analysis, our system placed tightly second: our morphological analysis accuracy was 93.19, the winning system’s 93.23.
%R 10.18653/v1/W19-4212
%U https://aclanthology.org/W19-4212
%U https://doi.org/10.18653/v1/W19-4212
%P 95-103
Markdown (Informal)
[UDPipe at SIGMORPHON 2019: Contextualized Embeddings, Regularization with Morphological Categories, Corpora Merging](https://aclanthology.org/W19-4212) (Straka et al., ACL 2019)
ACL