@inproceedings{rueter-etal-2024-erzya,
title = "On {E}rzya and {M}oksha Corpora and Analyzer Development, {ERME}-{PSLA} 1950s",
author = "Rueter, Jack and
Erina, Olga and
Kabaeva, Nadezhda",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Pirinen, Flammie and
Macias, Melany and
Crespo Avila, Mario},
booktitle = "Proceedings of the 9th International Workshop on Computational Linguistics for Uralic Languages",
month = nov,
year = "2024",
address = "Helsinki, Finland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.iwclul-1.8",
pages = "67--75",
abstract = "This paper describes materials and annotation facilitation pertinent to the «Erzya-Moksha Electronic Resources and Linguistic Diversity» (EMERALD) project. It addresses work following the construction of finite-state analyzers for the Mordvin languages, the gathering of test corpora, and the development of metadata strategies for descriptive research. In this paper, we provide three descriptors for a set of new Erzya and Moksha research materials at the Language Bank of Finland. The descriptors illustrate (1) a low-annotation subcorpora set of the «Electronic Resources for Moksha and Erzya» (ERME); (2) the state of the open-source analyzers used in their automatic annotation, and (3) the development of metadata documentation for the «EMERALD» project, associated with this endeavor. Outcomes of the article include an introduction to new research materials, an illustration of the state of the Mordvin annotation pipeline, and perspectives for the further enhancement of the annotation pipeline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rueter-etal-2024-erzya">
<titleInfo>
<title>On Erzya and Moksha Corpora and Analyzer Development, ERME-PSLA 1950s</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Erina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadezhda</namePart>
<namePart type="family">Kabaeva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melany</namePart>
<namePart type="family">Macias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Crespo Avila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Helsinki, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes materials and annotation facilitation pertinent to the «Erzya-Moksha Electronic Resources and Linguistic Diversity» (EMERALD) project. It addresses work following the construction of finite-state analyzers for the Mordvin languages, the gathering of test corpora, and the development of metadata strategies for descriptive research. In this paper, we provide three descriptors for a set of new Erzya and Moksha research materials at the Language Bank of Finland. The descriptors illustrate (1) a low-annotation subcorpora set of the «Electronic Resources for Moksha and Erzya» (ERME); (2) the state of the open-source analyzers used in their automatic annotation, and (3) the development of metadata documentation for the «EMERALD» project, associated with this endeavor. Outcomes of the article include an introduction to new research materials, an illustration of the state of the Mordvin annotation pipeline, and perspectives for the further enhancement of the annotation pipeline.</abstract>
<identifier type="citekey">rueter-etal-2024-erzya</identifier>
<location>
<url>https://aclanthology.org/2024.iwclul-1.8</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>67</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Erzya and Moksha Corpora and Analyzer Development, ERME-PSLA 1950s
%A Rueter, Jack
%A Erina, Olga
%A Kabaeva, Nadezhda
%Y Hämäläinen, Mika
%Y Pirinen, Flammie
%Y Macias, Melany
%Y Crespo Avila, Mario
%S Proceedings of the 9th International Workshop on Computational Linguistics for Uralic Languages
%D 2024
%8 November
%I Association for Computational Linguistics
%C Helsinki, Finland
%F rueter-etal-2024-erzya
%X This paper describes materials and annotation facilitation pertinent to the «Erzya-Moksha Electronic Resources and Linguistic Diversity» (EMERALD) project. It addresses work following the construction of finite-state analyzers for the Mordvin languages, the gathering of test corpora, and the development of metadata strategies for descriptive research. In this paper, we provide three descriptors for a set of new Erzya and Moksha research materials at the Language Bank of Finland. The descriptors illustrate (1) a low-annotation subcorpora set of the «Electronic Resources for Moksha and Erzya» (ERME); (2) the state of the open-source analyzers used in their automatic annotation, and (3) the development of metadata documentation for the «EMERALD» project, associated with this endeavor. Outcomes of the article include an introduction to new research materials, an illustration of the state of the Mordvin annotation pipeline, and perspectives for the further enhancement of the annotation pipeline.
%U https://aclanthology.org/2024.iwclul-1.8
%P 67-75
Markdown (Informal)
[On Erzya and Moksha Corpora and Analyzer Development, ERME-PSLA 1950s](https://aclanthology.org/2024.iwclul-1.8) (Rueter et al., IWCLUL 2024)
ACL