@inproceedings{steingrimsson-etal-2019-augmenting,
title = "Augmenting a {B}i{LSTM} Tagger with a Morphological Lexicon and a Lexical Category Identification Step",
author = {Steingr{\'i}msson, Stein{\th}{\'o}r and
K{\'a}rason, {\"O}rvar and
Loftsson, Hrafn},
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-1133/",
doi = "10.26615/978-954-452-056-4_133",
pages = "1161--1168",
abstract = "Previous work on using BiLSTM models for PoS tagging has primarily focused on small tagsets. We evaluate BiLSTM models for tagging Icelandic, a morphologically rich language, using a relatively large tagset. Our baseline BiLSTM model achieves higher accuracy than any other previously published tagger, when not taking advantage of a morphological lexicon. When we extend the model by incorporating such data, we outperform the earlier state-of-the-art results by a significant margin. We also report on work in progress that attempts to address the problem of data sparsity inherent to morphologically detailed, fine-grained tagsets. We experiment with training a separate model on only the lexical category and using the coarse-grained output tag as an input into to the main model. This method further increases the accuracy and reduces the tagging errors by 21.3{\%} compared to previous state-of-the-art results. Finally, we train and test our tagger on a new gold standard for Icelandic."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steingrimsson-etal-2019-augmenting">
<titleInfo>
<title>Augmenting a BiLSTM Tagger with a Morphological Lexicon and a Lexical Category Identification Step</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stein\thór</namePart>
<namePart type="family">Steingrímsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Örvar</namePart>
<namePart type="family">Kárason</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous work on using BiLSTM models for PoS tagging has primarily focused on small tagsets. We evaluate BiLSTM models for tagging Icelandic, a morphologically rich language, using a relatively large tagset. Our baseline BiLSTM model achieves higher accuracy than any other previously published tagger, when not taking advantage of a morphological lexicon. When we extend the model by incorporating such data, we outperform the earlier state-of-the-art results by a significant margin. We also report on work in progress that attempts to address the problem of data sparsity inherent to morphologically detailed, fine-grained tagsets. We experiment with training a separate model on only the lexical category and using the coarse-grained output tag as an input into to the main model. This method further increases the accuracy and reduces the tagging errors by 21.3% compared to previous state-of-the-art results. Finally, we train and test our tagger on a new gold standard for Icelandic.</abstract>
<identifier type="citekey">steingrimsson-etal-2019-augmenting</identifier>
<identifier type="doi">10.26615/978-954-452-056-4_133</identifier>
<location>
<url>https://aclanthology.org/R19-1133/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>1161</start>
<end>1168</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Augmenting a BiLSTM Tagger with a Morphological Lexicon and a Lexical Category Identification Step
%A Steingrímsson, Stein\thór
%A Kárason, Örvar
%A Loftsson, Hrafn
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F steingrimsson-etal-2019-augmenting
%X Previous work on using BiLSTM models for PoS tagging has primarily focused on small tagsets. We evaluate BiLSTM models for tagging Icelandic, a morphologically rich language, using a relatively large tagset. Our baseline BiLSTM model achieves higher accuracy than any other previously published tagger, when not taking advantage of a morphological lexicon. When we extend the model by incorporating such data, we outperform the earlier state-of-the-art results by a significant margin. We also report on work in progress that attempts to address the problem of data sparsity inherent to morphologically detailed, fine-grained tagsets. We experiment with training a separate model on only the lexical category and using the coarse-grained output tag as an input into to the main model. This method further increases the accuracy and reduces the tagging errors by 21.3% compared to previous state-of-the-art results. Finally, we train and test our tagger on a new gold standard for Icelandic.
%R 10.26615/978-954-452-056-4_133
%U https://aclanthology.org/R19-1133/
%U https://doi.org/10.26615/978-954-452-056-4_133
%P 1161-1168
Markdown (Informal)
[Augmenting a BiLSTM Tagger with a Morphological Lexicon and a Lexical Category Identification Step](https://aclanthology.org/R19-1133/) (Steingrímsson et al., RANLP 2019)
ACL