@inproceedings{zinsmeister-etal-2014-adapting,
title = "Adapting a part-of-speech tagset to non-standard text: The case of {STTS}",
author = "Zinsmeister, Heike and
Heid, Ulrich and
Beck, Kathrin",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/721_Paper.pdf",
pages = "4097--4104",
abstract = {The Stuttgart-T{\"u}bingen TagSet (STTS) is a de-facto standard for the part-of-speech tagging of German texts. Since its first publication in 1995, STTS has been used in a variety of annotation projects, some of which have adapted the tagset slightly for their specific needs. Recently, the focus of many projects has shifted from the analysis of newspaper text to that of non-standard varieties such as user-generated content, historical texts, and learner language. These text types contain linguistic phenomena that are missing from or are only suboptimally covered by STTS; in a community effort, German NLP researchers have therefore proposed additions to and modifications of the tagset that will handle these phenomena more appropriately. In addition, they have discussed alternative ways of tag assignment in terms of bipartite tags (stem, token) for historical texts and tripartite tags (lexicon, morphology, distribution) for learner texts. In this article, we report on this ongoing activity, addressing methodological issues and discussing selected phenomena and their treatment in the tagset adaptation process.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zinsmeister-etal-2014-adapting">
<titleInfo>
<title>Adapting a part-of-speech tagset to non-standard text: The case of STTS</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heike</namePart>
<namePart type="family">Zinsmeister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Heid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathrin</namePart>
<namePart type="family">Beck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Stuttgart-Tübingen TagSet (STTS) is a de-facto standard for the part-of-speech tagging of German texts. Since its first publication in 1995, STTS has been used in a variety of annotation projects, some of which have adapted the tagset slightly for their specific needs. Recently, the focus of many projects has shifted from the analysis of newspaper text to that of non-standard varieties such as user-generated content, historical texts, and learner language. These text types contain linguistic phenomena that are missing from or are only suboptimally covered by STTS; in a community effort, German NLP researchers have therefore proposed additions to and modifications of the tagset that will handle these phenomena more appropriately. In addition, they have discussed alternative ways of tag assignment in terms of bipartite tags (stem, token) for historical texts and tripartite tags (lexicon, morphology, distribution) for learner texts. In this article, we report on this ongoing activity, addressing methodological issues and discussing selected phenomena and their treatment in the tagset adaptation process.</abstract>
<identifier type="citekey">zinsmeister-etal-2014-adapting</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/721_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>4097</start>
<end>4104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adapting a part-of-speech tagset to non-standard text: The case of STTS
%A Zinsmeister, Heike
%A Heid, Ulrich
%A Beck, Kathrin
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F zinsmeister-etal-2014-adapting
%X The Stuttgart-Tübingen TagSet (STTS) is a de-facto standard for the part-of-speech tagging of German texts. Since its first publication in 1995, STTS has been used in a variety of annotation projects, some of which have adapted the tagset slightly for their specific needs. Recently, the focus of many projects has shifted from the analysis of newspaper text to that of non-standard varieties such as user-generated content, historical texts, and learner language. These text types contain linguistic phenomena that are missing from or are only suboptimally covered by STTS; in a community effort, German NLP researchers have therefore proposed additions to and modifications of the tagset that will handle these phenomena more appropriately. In addition, they have discussed alternative ways of tag assignment in terms of bipartite tags (stem, token) for historical texts and tripartite tags (lexicon, morphology, distribution) for learner texts. In this article, we report on this ongoing activity, addressing methodological issues and discussing selected phenomena and their treatment in the tagset adaptation process.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/721_Paper.pdf
%P 4097-4104
Markdown (Informal)
[Adapting a part-of-speech tagset to non-standard text: The case of STTS](http://www.lrec-conf.org/proceedings/lrec2014/pdf/721_Paper.pdf) (Zinsmeister et al., LREC 2014)
ACL