@inproceedings{janssen-2012-neotag,
title = "{N}eo{T}ag: a {POS} Tagger for Grammatical Neologism Detection",
author = "Janssen, Maarten",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}`12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L12-1653/",
pages = "2118--2124",
abstract = "POS Taggers typically fail to correctly tag grammatical neologisms: for known words, a tagger will only take known tags into account, and hence discard any possibility that the word is used in a novel or deviant grammatical category in the text at hand. Grammatical neologisms are relatively rare, and therefore do not pose a significant problem for the overall performance of a tagger. But for studies on neologisms and grammaticalization processes, this makes traditional taggers rather unfit. This article describes a modified POS tagger that explicitly considers new tags for known words, hence making it better fit for neologism research. This tagger, called NeoTag, has an overall accuracy that is comparable to other taggers, but scores much better for grammatical neologisms. To achieve this, the tagger applies a system of {\{}{\textbackslash}em lexical smoothing{\}}, which adds new categories to known words based on known homographs. NeoTag also lemmatizes words as part of the tagging system, achieving a high accuracy on lemmatization for both known and unknown words, without the need for an external lexicon. The use of NeoTag is not restricted to grammatical neologism detection, and it can be used for other purposes as well."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="janssen-2012-neotag">
<titleInfo>
<title>NeoTag: a POS Tagger for Grammatical Neologism Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maarten</namePart>
<namePart type="family">Janssen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC‘12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>POS Taggers typically fail to correctly tag grammatical neologisms: for known words, a tagger will only take known tags into account, and hence discard any possibility that the word is used in a novel or deviant grammatical category in the text at hand. Grammatical neologisms are relatively rare, and therefore do not pose a significant problem for the overall performance of a tagger. But for studies on neologisms and grammaticalization processes, this makes traditional taggers rather unfit. This article describes a modified POS tagger that explicitly considers new tags for known words, hence making it better fit for neologism research. This tagger, called NeoTag, has an overall accuracy that is comparable to other taggers, but scores much better for grammatical neologisms. To achieve this, the tagger applies a system of {\textbackslashem lexical smoothing}, which adds new categories to known words based on known homographs. NeoTag also lemmatizes words as part of the tagging system, achieving a high accuracy on lemmatization for both known and unknown words, without the need for an external lexicon. The use of NeoTag is not restricted to grammatical neologism detection, and it can be used for other purposes as well.</abstract>
<identifier type="citekey">janssen-2012-neotag</identifier>
<location>
<url>https://aclanthology.org/L12-1653/</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2118</start>
<end>2124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NeoTag: a POS Tagger for Grammatical Neologism Detection
%A Janssen, Maarten
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC‘12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F janssen-2012-neotag
%X POS Taggers typically fail to correctly tag grammatical neologisms: for known words, a tagger will only take known tags into account, and hence discard any possibility that the word is used in a novel or deviant grammatical category in the text at hand. Grammatical neologisms are relatively rare, and therefore do not pose a significant problem for the overall performance of a tagger. But for studies on neologisms and grammaticalization processes, this makes traditional taggers rather unfit. This article describes a modified POS tagger that explicitly considers new tags for known words, hence making it better fit for neologism research. This tagger, called NeoTag, has an overall accuracy that is comparable to other taggers, but scores much better for grammatical neologisms. To achieve this, the tagger applies a system of {\textbackslashem lexical smoothing}, which adds new categories to known words based on known homographs. NeoTag also lemmatizes words as part of the tagging system, achieving a high accuracy on lemmatization for both known and unknown words, without the need for an external lexicon. The use of NeoTag is not restricted to grammatical neologism detection, and it can be used for other purposes as well.
%U https://aclanthology.org/L12-1653/
%P 2118-2124
Markdown (Informal)
[NeoTag: a POS Tagger for Grammatical Neologism Detection](https://aclanthology.org/L12-1653/) (Janssen, LREC 2012)
ACL