@inproceedings{zueva-etal-2020-finite,
title = "A Finite-State Morphological Analyser for {E}venki",
author = "Zueva, Anna and
Kuznetsova, Anastasia and
Tyers, Francis",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.314",
pages = "2581--2589",
abstract = "It has been widely admitted that morphological analysis is an important step in automated text processing for morphologically rich languages. Evenki is a language with rich morphology, therefore a morphological analyser is highly desirable for processing Evenki texts and developing applications for Evenki. Although two morphological analysers for Evenki have already been developed, they are able to analyse less than a half of the available Evenki corpora. The aim of this paper is to create a new morphological analyser for Evenki. It is implemented using the Helsinki Finite-State Transducer toolkit (HFST). The lexc formalism is used to specify the morphotactic rules, which define the valid orderings of morphemes in a word. Morphophonological alternations and orthographic rules are described using the twol formalism. The lexicon is extracted from available machine-readable dictionaries. Since a part of the corpora belongs to texts in Evenki dialects, a version of the analyser with relaxed rules is developed for processing dialectal features. We evaluate the analyser on available Evenki corpora and estimate precision, recall and F-score. We obtain coverage scores of between 61{\%} and 87{\%} on the available Evenki corpora.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zueva-etal-2020-finite">
<titleInfo>
<title>A Finite-State Morphological Analyser for Evenki</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Zueva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Kuznetsova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>It has been widely admitted that morphological analysis is an important step in automated text processing for morphologically rich languages. Evenki is a language with rich morphology, therefore a morphological analyser is highly desirable for processing Evenki texts and developing applications for Evenki. Although two morphological analysers for Evenki have already been developed, they are able to analyse less than a half of the available Evenki corpora. The aim of this paper is to create a new morphological analyser for Evenki. It is implemented using the Helsinki Finite-State Transducer toolkit (HFST). The lexc formalism is used to specify the morphotactic rules, which define the valid orderings of morphemes in a word. Morphophonological alternations and orthographic rules are described using the twol formalism. The lexicon is extracted from available machine-readable dictionaries. Since a part of the corpora belongs to texts in Evenki dialects, a version of the analyser with relaxed rules is developed for processing dialectal features. We evaluate the analyser on available Evenki corpora and estimate precision, recall and F-score. We obtain coverage scores of between 61% and 87% on the available Evenki corpora.</abstract>
<identifier type="citekey">zueva-etal-2020-finite</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.314</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>2581</start>
<end>2589</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Finite-State Morphological Analyser for Evenki
%A Zueva, Anna
%A Kuznetsova, Anastasia
%A Tyers, Francis
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F zueva-etal-2020-finite
%X It has been widely admitted that morphological analysis is an important step in automated text processing for morphologically rich languages. Evenki is a language with rich morphology, therefore a morphological analyser is highly desirable for processing Evenki texts and developing applications for Evenki. Although two morphological analysers for Evenki have already been developed, they are able to analyse less than a half of the available Evenki corpora. The aim of this paper is to create a new morphological analyser for Evenki. It is implemented using the Helsinki Finite-State Transducer toolkit (HFST). The lexc formalism is used to specify the morphotactic rules, which define the valid orderings of morphemes in a word. Morphophonological alternations and orthographic rules are described using the twol formalism. The lexicon is extracted from available machine-readable dictionaries. Since a part of the corpora belongs to texts in Evenki dialects, a version of the analyser with relaxed rules is developed for processing dialectal features. We evaluate the analyser on available Evenki corpora and estimate precision, recall and F-score. We obtain coverage scores of between 61% and 87% on the available Evenki corpora.
%U https://aclanthology.org/2020.lrec-1.314
%P 2581-2589
Markdown (Informal)
[A Finite-State Morphological Analyser for Evenki](https://aclanthology.org/2020.lrec-1.314) (Zueva et al., LREC 2020)
ACL
- Anna Zueva, Anastasia Kuznetsova, and Francis Tyers. 2020. A Finite-State Morphological Analyser for Evenki. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 2581–2589, Marseille, France. European Language Resources Association.