@inproceedings{klatt-2006-corpus,
title = "A Corpus-based Approach to the Interpretation of Unknown Words with an Application to {G}erman",
author = "Klatt, Stefan",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/552_pdf.pdf",
abstract = "Usually a high portion of the different word forms in a corpusreceive no reading by the lexical and/or morphological analysis. These unknown words constitute a huge problem for NLP analysis tasks likePOS-tagging or syntactic parsing. We present a parameterizable (in principle language-independent) corpus-basedapproach for the interpretation of unknown words that only needs a tokenizedcorpus and can be used in both offline and online applications. In combination with a few linguistic (language-dependent) rules unknown verbs, adjectives, nouns, multiword units etc. are identified. Depending on the recognized word class(es), more detailed morphosyntactic and semantic information is additionally identified in opposite to the majority ofother unknown word guessing methods,which only uses a very narrow decision window to assign an unknown wordits correct reading respective Part-of-Speech tag in a given text. We tested our approach by experiments with German data and received very promising results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="klatt-2006-corpus">
<titleInfo>
<title>A Corpus-based Approach to the Interpretation of Unknown Words with an Application to German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Klatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Usually a high portion of the different word forms in a corpusreceive no reading by the lexical and/or morphological analysis. These unknown words constitute a huge problem for NLP analysis tasks likePOS-tagging or syntactic parsing. We present a parameterizable (in principle language-independent) corpus-basedapproach for the interpretation of unknown words that only needs a tokenizedcorpus and can be used in both offline and online applications. In combination with a few linguistic (language-dependent) rules unknown verbs, adjectives, nouns, multiword units etc. are identified. Depending on the recognized word class(es), more detailed morphosyntactic and semantic information is additionally identified in opposite to the majority ofother unknown word guessing methods,which only uses a very narrow decision window to assign an unknown wordits correct reading respective Part-of-Speech tag in a given text. We tested our approach by experiments with German data and received very promising results.</abstract>
<identifier type="citekey">klatt-2006-corpus</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/552_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Corpus-based Approach to the Interpretation of Unknown Words with an Application to German
%A Klatt, Stefan
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F klatt-2006-corpus
%X Usually a high portion of the different word forms in a corpusreceive no reading by the lexical and/or morphological analysis. These unknown words constitute a huge problem for NLP analysis tasks likePOS-tagging or syntactic parsing. We present a parameterizable (in principle language-independent) corpus-basedapproach for the interpretation of unknown words that only needs a tokenizedcorpus and can be used in both offline and online applications. In combination with a few linguistic (language-dependent) rules unknown verbs, adjectives, nouns, multiword units etc. are identified. Depending on the recognized word class(es), more detailed morphosyntactic and semantic information is additionally identified in opposite to the majority ofother unknown word guessing methods,which only uses a very narrow decision window to assign an unknown wordits correct reading respective Part-of-Speech tag in a given text. We tested our approach by experiments with German data and received very promising results.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/552_pdf.pdf
Markdown (Informal)
[A Corpus-based Approach to the Interpretation of Unknown Words with an Application to German](http://www.lrec-conf.org/proceedings/lrec2006/pdf/552_pdf.pdf) (Klatt, LREC 2006)
ACL