@inproceedings{haselbach-heid-2010-development,
title = "The Development of a Morphosyntactic Tagset for {A}frikaans and its Use with Statistical Tagging",
author = "Haselbach, Boris and
Heid, Ulrich",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/318_Paper.pdf",
abstract = "In this paper, we present a morphosyntactic tagset for Afrikaans based on the guidelines developed by the Expert Advisory Group on Language Engineering Standards (EAGLES). We compare our slim yet expressive tagset, MAATS (Morphosyntactic AfrikAans TagSet), with an existing one which primarily focuses on a detailed morphosyntactic and semantic description of word forms. MAATS will primarily be used for the extraction of lexical data from large pos-tagged corpora. We not only focus on morphosyntactic properties but also on the processability with statistical tagging. We discuss the tagset design and motivate our classification of Afrikaans word forms, in particular we focus on the categorization of verbs and conjunctions. The complete tagset in presented and we briefly discuss each word class. In a case study with an Afrikaans newspaper corpus, we evaluate our tagset with four different statistical taggers. Despite a relatively small amount of training data, however with a large tagger lexicon, TnT-Tagger scores 97.05 {\%} accuracy. Additionally, we present some error sources and discuss future work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="haselbach-heid-2010-development">
<titleInfo>
<title>The Development of a Morphosyntactic Tagset for Afrikaans and its Use with Statistical Tagging</title>
</titleInfo>
<name type="personal">
<namePart type="given">Boris</namePart>
<namePart type="family">Haselbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Heid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present a morphosyntactic tagset for Afrikaans based on the guidelines developed by the Expert Advisory Group on Language Engineering Standards (EAGLES). We compare our slim yet expressive tagset, MAATS (Morphosyntactic AfrikAans TagSet), with an existing one which primarily focuses on a detailed morphosyntactic and semantic description of word forms. MAATS will primarily be used for the extraction of lexical data from large pos-tagged corpora. We not only focus on morphosyntactic properties but also on the processability with statistical tagging. We discuss the tagset design and motivate our classification of Afrikaans word forms, in particular we focus on the categorization of verbs and conjunctions. The complete tagset in presented and we briefly discuss each word class. In a case study with an Afrikaans newspaper corpus, we evaluate our tagset with four different statistical taggers. Despite a relatively small amount of training data, however with a large tagger lexicon, TnT-Tagger scores 97.05 % accuracy. Additionally, we present some error sources and discuss future work.</abstract>
<identifier type="citekey">haselbach-heid-2010-development</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/318_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Development of a Morphosyntactic Tagset for Afrikaans and its Use with Statistical Tagging
%A Haselbach, Boris
%A Heid, Ulrich
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F haselbach-heid-2010-development
%X In this paper, we present a morphosyntactic tagset for Afrikaans based on the guidelines developed by the Expert Advisory Group on Language Engineering Standards (EAGLES). We compare our slim yet expressive tagset, MAATS (Morphosyntactic AfrikAans TagSet), with an existing one which primarily focuses on a detailed morphosyntactic and semantic description of word forms. MAATS will primarily be used for the extraction of lexical data from large pos-tagged corpora. We not only focus on morphosyntactic properties but also on the processability with statistical tagging. We discuss the tagset design and motivate our classification of Afrikaans word forms, in particular we focus on the categorization of verbs and conjunctions. The complete tagset in presented and we briefly discuss each word class. In a case study with an Afrikaans newspaper corpus, we evaluate our tagset with four different statistical taggers. Despite a relatively small amount of training data, however with a large tagger lexicon, TnT-Tagger scores 97.05 % accuracy. Additionally, we present some error sources and discuss future work.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/318_Paper.pdf
Markdown (Informal)
[The Development of a Morphosyntactic Tagset for Afrikaans and its Use with Statistical Tagging](http://www.lrec-conf.org/proceedings/lrec2010/pdf/318_Paper.pdf) (Haselbach & Heid, LREC 2010)
ACL