@inproceedings{ingolfsdottir-etal-2019-towards,
title = "Towards High Accuracy Named Entity Recognition for {I}celandic",
author = "Ing{\'o}lfsd{\'o}ttir, Svanhv{\'\i}t Lilja and
{\TH}orsteinsson, Sigurj{\'o}n and
Loftsson, Hrafn",
editor = "Hartmann, Mareike and
Plank, Barbara",
booktitle = "Proceedings of the 22nd Nordic Conference on Computational Linguistics",
month = sep # "{--}" # oct,
year = "2019",
address = "Turku, Finland",
publisher = {Link{\"o}ping University Electronic Press},
url = "https://aclanthology.org/W19-6142",
pages = "363--369",
abstract = "We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3{\%} when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80{\%} of MIM-GOLD has not yet been used for training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ingolfsdottir-etal-2019-towards">
<titleInfo>
<title>Towards High Accuracy Named Entity Recognition for Icelandic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Svanhvít</namePart>
<namePart type="given">Lilja</namePart>
<namePart type="family">Ingólfsdóttir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sigurjón</namePart>
<namePart type="family">\THorsteinsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-sep–oct</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Nordic Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mareike</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Linköping University Electronic Press</publisher>
<place>
<placeTerm type="text">Turku, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3% when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80% of MIM-GOLD has not yet been used for training.</abstract>
<identifier type="citekey">ingolfsdottir-etal-2019-towards</identifier>
<location>
<url>https://aclanthology.org/W19-6142</url>
</location>
<part>
<date>2019-sep–oct</date>
<extent unit="page">
<start>363</start>
<end>369</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards High Accuracy Named Entity Recognition for Icelandic
%A Ingólfsdóttir, Svanhvít Lilja
%A \THorsteinsson, Sigurjón
%A Loftsson, Hrafn
%Y Hartmann, Mareike
%Y Plank, Barbara
%S Proceedings of the 22nd Nordic Conference on Computational Linguistics
%D 2019
%8 sep–oct
%I Linköping University Electronic Press
%C Turku, Finland
%F ingolfsdottir-etal-2019-towards
%X We report on work in progress which consists of annotating an Icelandic corpus for named entities (NEs) and using it for training a named entity recognizer based on a Bidirectional Long Short-Term Memory model. Currently, we have annotated 7,538 NEs appearing in the first 200,000 tokens of a 1 million token corpus, MIM-GOLD, originally developed for serving as a gold standard for part-of-speech tagging. Our best performing model, trained on this subset of MIM-GOLD, and enriched with external word embeddings, obtains an overall F1 score of 81.3% when categorizing NEs into the following four categories: persons, locations, organizations and miscellaneous. Our preliminary results are promising, especially given the fact that 80% of MIM-GOLD has not yet been used for training.
%U https://aclanthology.org/W19-6142
%P 363-369
Markdown (Informal)
[Towards High Accuracy Named Entity Recognition for Icelandic](https://aclanthology.org/W19-6142) (Ingólfsdóttir et al., NoDaLiDa 2019)
ACL