@inproceedings{dorkin-sirts-2023-comparison,
title = "Comparison of Current Approaches to Lemmatization: A Case Study in {E}stonian",
author = "Dorkin, Aleksei and
Sirts, Kairit",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.28",
pages = "280--285",
abstract = "This study evaluates three different lemmatization approaches to Estonian{---}Generative character-level models, Pattern-based word-level classification models, and rule-based morphological analysis. According to our experiments, a significantly smaller Generative model consistently outperforms the Pattern-based classification model based on EstBERT. Additionally, we observe a relatively small overlap in errors made by all three models, indicating that an ensemble of different approach could lead to improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dorkin-sirts-2023-comparison">
<titleInfo>
<title>Comparison of Current Approaches to Lemmatization: A Case Study in Estonian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksei</namePart>
<namePart type="family">Dorkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kairit</namePart>
<namePart type="family">Sirts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study evaluates three different lemmatization approaches to Estonian—Generative character-level models, Pattern-based word-level classification models, and rule-based morphological analysis. According to our experiments, a significantly smaller Generative model consistently outperforms the Pattern-based classification model based on EstBERT. Additionally, we observe a relatively small overlap in errors made by all three models, indicating that an ensemble of different approach could lead to improvements.</abstract>
<identifier type="citekey">dorkin-sirts-2023-comparison</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.28</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>280</start>
<end>285</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparison of Current Approaches to Lemmatization: A Case Study in Estonian
%A Dorkin, Aleksei
%A Sirts, Kairit
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F dorkin-sirts-2023-comparison
%X This study evaluates three different lemmatization approaches to Estonian—Generative character-level models, Pattern-based word-level classification models, and rule-based morphological analysis. According to our experiments, a significantly smaller Generative model consistently outperforms the Pattern-based classification model based on EstBERT. Additionally, we observe a relatively small overlap in errors made by all three models, indicating that an ensemble of different approach could lead to improvements.
%U https://aclanthology.org/2023.nodalida-1.28
%P 280-285
Markdown (Informal)
[Comparison of Current Approaches to Lemmatization: A Case Study in Estonian](https://aclanthology.org/2023.nodalida-1.28) (Dorkin & Sirts, NoDaLiDa 2023)
ACL