@inproceedings{adolphs-2008-acquiring,
title = "Acquiring a Poor Man{'}s Inflectional Lexicon for {G}erman",
author = "Adolphs, Peter",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/867_paper.pdf",
abstract = "Many NLP modules and applications require the availability of a module for wide-coverage inflectional analysis. One way to obtain such analyses is to use a morphological analyser in combination with an inflectional lexicon. Since large text corpora nowadays are easily available and inflectional systems are in general well understood, it seems feasible to acquire lexical data from raw texts, guided by our knowledge of inflection. I present an acquisition method along these lines for German. The general idea can be roughly summarised as follows: first, generate a set of lexical entry hypotheses for each word-form in the corpus; then, select hypotheses that explain the word-forms found in the corpus best. To this end, I have turned an existing morphological grammar, cast in finite-state technology (Schmid et al. 2004), into a hypothesiser for lexical entries. Irregular forms are simply listed so that they do not interfere with the regular rules used in the hypothesiser. Running the hypothesiser on a text corpus yields a large number of lexical entry hypotheses. These are then ranked according to their validity with the help of a statistical model that is based on the number of attested and predicted word forms for each hypothesis.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="adolphs-2008-acquiring">
<titleInfo>
<title>Acquiring a Poor Man’s Inflectional Lexicon for German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Adolphs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many NLP modules and applications require the availability of a module for wide-coverage inflectional analysis. One way to obtain such analyses is to use a morphological analyser in combination with an inflectional lexicon. Since large text corpora nowadays are easily available and inflectional systems are in general well understood, it seems feasible to acquire lexical data from raw texts, guided by our knowledge of inflection. I present an acquisition method along these lines for German. The general idea can be roughly summarised as follows: first, generate a set of lexical entry hypotheses for each word-form in the corpus; then, select hypotheses that explain the word-forms found in the corpus best. To this end, I have turned an existing morphological grammar, cast in finite-state technology (Schmid et al. 2004), into a hypothesiser for lexical entries. Irregular forms are simply listed so that they do not interfere with the regular rules used in the hypothesiser. Running the hypothesiser on a text corpus yields a large number of lexical entry hypotheses. These are then ranked according to their validity with the help of a statistical model that is based on the number of attested and predicted word forms for each hypothesis.</abstract>
<identifier type="citekey">adolphs-2008-acquiring</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/867_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Acquiring a Poor Man’s Inflectional Lexicon for German
%A Adolphs, Peter
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F adolphs-2008-acquiring
%X Many NLP modules and applications require the availability of a module for wide-coverage inflectional analysis. One way to obtain such analyses is to use a morphological analyser in combination with an inflectional lexicon. Since large text corpora nowadays are easily available and inflectional systems are in general well understood, it seems feasible to acquire lexical data from raw texts, guided by our knowledge of inflection. I present an acquisition method along these lines for German. The general idea can be roughly summarised as follows: first, generate a set of lexical entry hypotheses for each word-form in the corpus; then, select hypotheses that explain the word-forms found in the corpus best. To this end, I have turned an existing morphological grammar, cast in finite-state technology (Schmid et al. 2004), into a hypothesiser for lexical entries. Irregular forms are simply listed so that they do not interfere with the regular rules used in the hypothesiser. Running the hypothesiser on a text corpus yields a large number of lexical entry hypotheses. These are then ranked according to their validity with the help of a statistical model that is based on the number of attested and predicted word forms for each hypothesis.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/867_paper.pdf
Markdown (Informal)
[Acquiring a Poor Man’s Inflectional Lexicon for German](http://www.lrec-conf.org/proceedings/lrec2008/pdf/867_paper.pdf) (Adolphs, LREC 2008)
ACL