@article{ravi-etal-2014-parallel,
title = "Parallel Algorithms for Unsupervised Tagging",
author = "Ravi, Sujith and
Vassilivitskii, Sergei and
Rastogi, Vibhor",
editor = "Lin, Dekang and
Collins, Michael and
Lee, Lillian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "2",
year = "2014",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q14-1009",
doi = "10.1162/tacl_a_00169",
pages = "105--118",
abstract = "We propose a new method for unsupervised tagging that finds minimal models which are then further improved by Expectation Maximization training. In contrast to previous approaches that rely on manually specified and multi-step heuristics for model minimization, our approach is a simple greedy approximation algorithm DMLC (Distributed-Minimum-Label-Cover) that solves this objective in a single step. We extend the method and show how to efficiently parallelize the algorithm on modern parallel computing platforms while preserving approximation guarantees. The new method easily scales to large data and grammar sizes, overcoming the memory bottleneck in previous approaches. We demonstrate the power of the new algorithm by evaluating on various sequence labeling tasks: Part-of-Speech tagging for multiple languages (including low-resource languages), with complete and incomplete dictionaries, and supertagging, a complex sequence labeling task, where the grammar size alone can grow to millions of entries. Our results show that for all of these settings, our method achieves state-of-the-art scalable performance that yields high quality tagging outputs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ravi-etal-2014-parallel">
<titleInfo>
<title>Parallel Algorithms for Unsupervised Tagging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergei</namePart>
<namePart type="family">Vassilivitskii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vibhor</namePart>
<namePart type="family">Rastogi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We propose a new method for unsupervised tagging that finds minimal models which are then further improved by Expectation Maximization training. In contrast to previous approaches that rely on manually specified and multi-step heuristics for model minimization, our approach is a simple greedy approximation algorithm DMLC (Distributed-Minimum-Label-Cover) that solves this objective in a single step. We extend the method and show how to efficiently parallelize the algorithm on modern parallel computing platforms while preserving approximation guarantees. The new method easily scales to large data and grammar sizes, overcoming the memory bottleneck in previous approaches. We demonstrate the power of the new algorithm by evaluating on various sequence labeling tasks: Part-of-Speech tagging for multiple languages (including low-resource languages), with complete and incomplete dictionaries, and supertagging, a complex sequence labeling task, where the grammar size alone can grow to millions of entries. Our results show that for all of these settings, our method achieves state-of-the-art scalable performance that yields high quality tagging outputs.</abstract>
<identifier type="citekey">ravi-etal-2014-parallel</identifier>
<identifier type="doi">10.1162/tacl_a_00169</identifier>
<location>
<url>https://aclanthology.org/Q14-1009</url>
</location>
<part>
<date>2014</date>
<detail type="volume"><number>2</number></detail>
<extent unit="page">
<start>105</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Parallel Algorithms for Unsupervised Tagging
%A Ravi, Sujith
%A Vassilivitskii, Sergei
%A Rastogi, Vibhor
%J Transactions of the Association for Computational Linguistics
%D 2014
%V 2
%I MIT Press
%C Cambridge, MA
%F ravi-etal-2014-parallel
%X We propose a new method for unsupervised tagging that finds minimal models which are then further improved by Expectation Maximization training. In contrast to previous approaches that rely on manually specified and multi-step heuristics for model minimization, our approach is a simple greedy approximation algorithm DMLC (Distributed-Minimum-Label-Cover) that solves this objective in a single step. We extend the method and show how to efficiently parallelize the algorithm on modern parallel computing platforms while preserving approximation guarantees. The new method easily scales to large data and grammar sizes, overcoming the memory bottleneck in previous approaches. We demonstrate the power of the new algorithm by evaluating on various sequence labeling tasks: Part-of-Speech tagging for multiple languages (including low-resource languages), with complete and incomplete dictionaries, and supertagging, a complex sequence labeling task, where the grammar size alone can grow to millions of entries. Our results show that for all of these settings, our method achieves state-of-the-art scalable performance that yields high quality tagging outputs.
%R 10.1162/tacl_a_00169
%U https://aclanthology.org/Q14-1009
%U https://doi.org/10.1162/tacl_a_00169
%P 105-118
Markdown (Informal)
[Parallel Algorithms for Unsupervised Tagging](https://aclanthology.org/Q14-1009) (Ravi et al., TACL 2014)
ACL