@inproceedings{dalianis-jongejan-2006-hand,
title = "Hand-crafted versus Machine-learned Inflectional Rules: The Euroling-{S}ite{S}eeker Stemmer and {CST}{'}s Lemmatiser",
author = "Dalianis, Hercules and
Jongejan, Bart",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/97_pdf.pdf",
abstract = "The Euroling stemmer is developed for a commercial web site and intranet search engine called SiteSeeker. SiteSeeker is basically used in the Swedish domain but to some extent also for the English domain. CST's lemmatiser comes from the Center for Language Technology, University of Copenhagen and was originally developed as a research prototype to create lemmatisation rules from training data. In this paper we compare the performance of the stemmer that uses handcrafted rules for Swedish, Danish and Norwegian as well one stemmer for Greek with CST's lemmatiser that uses training data to extract lemmatisation rules for Swedish, Danish, Norwegian and Greek. The performances of the two approaches are about the same with around 10 percent errors. The handcrafted rule based stemmer techniques are easy to get started with if the programmer has the proper linguistic knowledge. The machine trained sets of lemmatisation rules are very easy to produce without having linguistic knowledge given that one has correct training data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dalianis-jongejan-2006-hand">
<titleInfo>
<title>Hand-crafted versus Machine-learned Inflectional Rules: The Euroling-SiteSeeker Stemmer and CST’s Lemmatiser</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hercules</namePart>
<namePart type="family">Dalianis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Jongejan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Euroling stemmer is developed for a commercial web site and intranet search engine called SiteSeeker. SiteSeeker is basically used in the Swedish domain but to some extent also for the English domain. CST’s lemmatiser comes from the Center for Language Technology, University of Copenhagen and was originally developed as a research prototype to create lemmatisation rules from training data. In this paper we compare the performance of the stemmer that uses handcrafted rules for Swedish, Danish and Norwegian as well one stemmer for Greek with CST’s lemmatiser that uses training data to extract lemmatisation rules for Swedish, Danish, Norwegian and Greek. The performances of the two approaches are about the same with around 10 percent errors. The handcrafted rule based stemmer techniques are easy to get started with if the programmer has the proper linguistic knowledge. The machine trained sets of lemmatisation rules are very easy to produce without having linguistic knowledge given that one has correct training data.</abstract>
<identifier type="citekey">dalianis-jongejan-2006-hand</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/97_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hand-crafted versus Machine-learned Inflectional Rules: The Euroling-SiteSeeker Stemmer and CST’s Lemmatiser
%A Dalianis, Hercules
%A Jongejan, Bart
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F dalianis-jongejan-2006-hand
%X The Euroling stemmer is developed for a commercial web site and intranet search engine called SiteSeeker. SiteSeeker is basically used in the Swedish domain but to some extent also for the English domain. CST’s lemmatiser comes from the Center for Language Technology, University of Copenhagen and was originally developed as a research prototype to create lemmatisation rules from training data. In this paper we compare the performance of the stemmer that uses handcrafted rules for Swedish, Danish and Norwegian as well one stemmer for Greek with CST’s lemmatiser that uses training data to extract lemmatisation rules for Swedish, Danish, Norwegian and Greek. The performances of the two approaches are about the same with around 10 percent errors. The handcrafted rule based stemmer techniques are easy to get started with if the programmer has the proper linguistic knowledge. The machine trained sets of lemmatisation rules are very easy to produce without having linguistic knowledge given that one has correct training data.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/97_pdf.pdf
Markdown (Informal)
[Hand-crafted versus Machine-learned Inflectional Rules: The Euroling-SiteSeeker Stemmer and CST’s Lemmatiser](http://www.lrec-conf.org/proceedings/lrec2006/pdf/97_pdf.pdf) (Dalianis & Jongejan, LREC 2006)
ACL