@inproceedings{nicolai-yarowsky-2019-learning,
title = "Learning Morphosyntactic Analyzers from the {B}ible via Iterative Annotation Projection across 26 Languages",
author = "Nicolai, Garrett and
Yarowsky, David",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1172",
doi = "10.18653/v1/P19-1172",
pages = "1765--1774",
abstract = "A large percentage of computational tools are concentrated in a very small subset of the planet{'}s languages. Compounding the issue, many languages lack the high-quality linguistic annotation necessary for the construction of such tools with current machine learning methods. In this paper, we address both issues simultaneously: leveraging the high accuracy of English taggers and parsers, we project morphological information onto translations of the Bible in 26 varied test languages. Using an iterative discovery, constraint, and training process, we build inflectional lexica in the target languages. Through a combination of iteration, ensembling, and reranking, we see double-digit relative error reductions in lemmatization and morphological analysis over a strong initial system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nicolai-yarowsky-2019-learning">
<titleInfo>
<title>Learning Morphosyntactic Analyzers from the Bible via Iterative Annotation Projection across 26 Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Yarowsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A large percentage of computational tools are concentrated in a very small subset of the planet’s languages. Compounding the issue, many languages lack the high-quality linguistic annotation necessary for the construction of such tools with current machine learning methods. In this paper, we address both issues simultaneously: leveraging the high accuracy of English taggers and parsers, we project morphological information onto translations of the Bible in 26 varied test languages. Using an iterative discovery, constraint, and training process, we build inflectional lexica in the target languages. Through a combination of iteration, ensembling, and reranking, we see double-digit relative error reductions in lemmatization and morphological analysis over a strong initial system.</abstract>
<identifier type="citekey">nicolai-yarowsky-2019-learning</identifier>
<identifier type="doi">10.18653/v1/P19-1172</identifier>
<location>
<url>https://aclanthology.org/P19-1172</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>1765</start>
<end>1774</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Morphosyntactic Analyzers from the Bible via Iterative Annotation Projection across 26 Languages
%A Nicolai, Garrett
%A Yarowsky, David
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F nicolai-yarowsky-2019-learning
%X A large percentage of computational tools are concentrated in a very small subset of the planet’s languages. Compounding the issue, many languages lack the high-quality linguistic annotation necessary for the construction of such tools with current machine learning methods. In this paper, we address both issues simultaneously: leveraging the high accuracy of English taggers and parsers, we project morphological information onto translations of the Bible in 26 varied test languages. Using an iterative discovery, constraint, and training process, we build inflectional lexica in the target languages. Through a combination of iteration, ensembling, and reranking, we see double-digit relative error reductions in lemmatization and morphological analysis over a strong initial system.
%R 10.18653/v1/P19-1172
%U https://aclanthology.org/P19-1172
%U https://doi.org/10.18653/v1/P19-1172
%P 1765-1774
Markdown (Informal)
[Learning Morphosyntactic Analyzers from the Bible via Iterative Annotation Projection across 26 Languages](https://aclanthology.org/P19-1172) (Nicolai & Yarowsky, ACL 2019)
ACL