@inproceedings{novak-etal-2004-combining,
    title = "Combining Symbolic and Statistical Methods in Morphological Analysis and Unknown Word Guessing",
    author = "Nov{\'a}k, Attila  and
      Nagy, Viktor  and
      Oravecz, Csaba",
    editor = "Lino, Maria Teresa  and
      Xavier, Maria Francisca  and
      Ferreira, F{\'a}tima  and
      Costa, Rute  and
      Silva, Raquel",
    booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}{'}04)",
    month = may,
    year = "2004",
    address = "Lisbon, Portugal",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L04-1259/",
    abstract = "Highly inflectional/agglutinative languages like Hungarian typically feature possible word forms in such a magnitude that automatic methods that provide morphosyntactic annotation on the basis of some training corpus often face the problem of data sparseness. A possible solution to this problem is to apply a comprehensive morphological analyser, which is able to analyse almost all wordforms alleviating the problem of unseen tokens. However, although in a smaller number, there will still remain forms which are unknown even to the morphological analyzer and should be handled by some guesser mechanism. The paper will describe a hybrid method which combines symbolic and statistical information to provide lemmatization and suffix analyses for unknown word forms. Evaluation is carried out with respect to the induction of possible analyses and their respective lexical probabilities for unknown word forms in a part-of-speech tagging system."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="novak-etal-2004-combining">
    <titleInfo>
        <title>Combining Symbolic and Statistical Methods in Morphological Analysis and Unknown Word Guessing</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Attila</namePart>
        <namePart type="family">Novák</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Viktor</namePart>
        <namePart type="family">Nagy</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Csaba</namePart>
        <namePart type="family">Oravecz</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2004-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Maria</namePart>
            <namePart type="given">Teresa</namePart>
            <namePart type="family">Lino</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Maria</namePart>
            <namePart type="given">Francisca</namePart>
            <namePart type="family">Xavier</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Fátima</namePart>
            <namePart type="family">Ferreira</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Rute</namePart>
            <namePart type="family">Costa</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Raquel</namePart>
            <namePart type="family">Silva</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Lisbon, Portugal</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Highly inflectional/agglutinative languages like Hungarian typically feature possible word forms in such a magnitude that automatic methods that provide morphosyntactic annotation on the basis of some training corpus often face the problem of data sparseness. A possible solution to this problem is to apply a comprehensive morphological analyser, which is able to analyse almost all wordforms alleviating the problem of unseen tokens. However, although in a smaller number, there will still remain forms which are unknown even to the morphological analyzer and should be handled by some guesser mechanism. The paper will describe a hybrid method which combines symbolic and statistical information to provide lemmatization and suffix analyses for unknown word forms. Evaluation is carried out with respect to the induction of possible analyses and their respective lexical probabilities for unknown word forms in a part-of-speech tagging system.</abstract>
    <identifier type="citekey">novak-etal-2004-combining</identifier>
    <location>
        <url>https://aclanthology.org/L04-1259/</url>
    </location>
    <part>
        <date>2004-05</date>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Combining Symbolic and Statistical Methods in Morphological Analysis and Unknown Word Guessing
%A Novák, Attila
%A Nagy, Viktor
%A Oravecz, Csaba
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F novak-etal-2004-combining
%X Highly inflectional/agglutinative languages like Hungarian typically feature possible word forms in such a magnitude that automatic methods that provide morphosyntactic annotation on the basis of some training corpus often face the problem of data sparseness. A possible solution to this problem is to apply a comprehensive morphological analyser, which is able to analyse almost all wordforms alleviating the problem of unseen tokens. However, although in a smaller number, there will still remain forms which are unknown even to the morphological analyzer and should be handled by some guesser mechanism. The paper will describe a hybrid method which combines symbolic and statistical information to provide lemmatization and suffix analyses for unknown word forms. Evaluation is carried out with respect to the induction of possible analyses and their respective lexical probabilities for unknown word forms in a part-of-speech tagging system.
%U https://aclanthology.org/L04-1259/
Markdown (Informal)
[Combining Symbolic and Statistical Methods in Morphological Analysis and Unknown Word Guessing](https://aclanthology.org/L04-1259/) (Novák et al., LREC 2004)
ACL