@inproceedings{ingolfsdottir-etal-2019-towards,
    title = "Towards High Accuracy Named Entity Recognition for {I}celandic",
    author = "Ing{\'o}lfsd{\'o}ttir, Svanhv{\'i}t Lilja  and
      {\TH}orsteinsson, Sigurj{\'o}n  and
      Loftsson, Hrafn",
    editor = "Hartmann, Mareike  and
      Plank, Barbara",
    booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
    month = sep # "–" # oct,
    year = "2019",
    address = "Turku, Finland",
    publisher = {Link{\"o}ping University Electronic Press},
    url = "https://aclanthology.org/W19-6142/",
    pages = "363--369",
    abstract = "We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3{\%} when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80{\%} of MIM-GOLD has not yet been used for training."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ingolfsdottir-etal-2019-towards">
    <titleInfo>
        <title>Towards High Accuracy Named Entity Recognition for Icelandic</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Svanhvít</namePart>
        <namePart type="given">Lilja</namePart>
        <namePart type="family">Ingólfsdóttir</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Sigurjón</namePart>
        <namePart type="family">\THorsteinsson</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Hrafn</namePart>
        <namePart type="family">Loftsson</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2019-sep–oct</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 22nd Nordic Conference on Computational Linguistics</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Mareike</namePart>
            <namePart type="family">Hartmann</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Barbara</namePart>
            <namePart type="family">Plank</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Linköping University Electronic Press</publisher>
            <place>
                <placeTerm type="text">Turku, Finland</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3% when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80% of MIM-GOLD has not yet been used for training.</abstract>
    <identifier type="citekey">ingolfsdottir-etal-2019-towards</identifier>
    <location>
        <url>https://aclanthology.org/W19-6142/</url>
    </location>
    <part>
        <date>2019-sep–oct</date>
        <extent unit="page">
            <start>363</start>
            <end>369</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards High Accuracy Named Entity Recognition for Icelandic
%A Ingólfsdóttir, Svanhvít Lilja
%A \THorsteinsson, Sigurjón
%A Loftsson, Hrafn
%Y Hartmann, Mareike
%Y Plank, Barbara
%S Proceedings of the 22nd Nordic Conference on Computational Linguistics
%D 2019
%8 sep–oct
%I Linköping University Electronic Press
%C Turku, Finland
%F ingolfsdottir-etal-2019-towards
%X We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3% when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80% of MIM-GOLD has not yet been used for training.
%U https://aclanthology.org/W19-6142/
%P 363-369
Markdown (Informal)
[Towards High Accuracy Named Entity Recognition for Icelandic](https://aclanthology.org/W19-6142/) (Ingólfsdóttir et al., NoDaLiDa 2019)
ACL