@inproceedings{hamalainen-etal-2023-working,
title = "Working Towards Digital Documentation of {U}ralic Languages With Open-Source Tools and {M}odern {NLP} Methods",
author = {H{\"a}m{\"a}l{\"a}inen, Mika and
Rueter, Jack and
Alnajjar, Khalid and
Partanen, Niko},
editor = "Elazar, Yanai and
Ettinger, Allyson and
Kassner, Nora and
Ruder, Sebastian and
A. Smith, Noah",
booktitle = "Proceedings of the Big Picture Workshop",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bigpicture-1.2",
doi = "10.18653/v1/2023.bigpicture-1.2",
pages = "18--27",
abstract = "We present our work towards building an infrastructure for documenting endangered languages with the focus on Uralic languages in particular. Our infrastructure consists of tools to write dictionaries so that entries are structured in XML format. These dictionaries are the foundation for rule-based NLP tools such as FSTs. We also work actively towards enhancing these dictionaries and tools by using the latest state-of-the-art neural models by generating training data through rules and lexica",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamalainen-etal-2023-working">
<titleInfo>
<title>Working Towards Digital Documentation of Uralic Languages With Open-Source Tools and Modern NLP Methods</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Big Picture Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allyson</namePart>
<namePart type="family">Ettinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Kassner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="family">A. Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our work towards building an infrastructure for documenting endangered languages with the focus on Uralic languages in particular. Our infrastructure consists of tools to write dictionaries so that entries are structured in XML format. These dictionaries are the foundation for rule-based NLP tools such as FSTs. We also work actively towards enhancing these dictionaries and tools by using the latest state-of-the-art neural models by generating training data through rules and lexica</abstract>
<identifier type="citekey">hamalainen-etal-2023-working</identifier>
<identifier type="doi">10.18653/v1/2023.bigpicture-1.2</identifier>
<location>
<url>https://aclanthology.org/2023.bigpicture-1.2</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>18</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Working Towards Digital Documentation of Uralic Languages With Open-Source Tools and Modern NLP Methods
%A Hämäläinen, Mika
%A Rueter, Jack
%A Alnajjar, Khalid
%A Partanen, Niko
%Y Elazar, Yanai
%Y Ettinger, Allyson
%Y Kassner, Nora
%Y Ruder, Sebastian
%Y A. Smith, Noah
%S Proceedings of the Big Picture Workshop
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F hamalainen-etal-2023-working
%X We present our work towards building an infrastructure for documenting endangered languages with the focus on Uralic languages in particular. Our infrastructure consists of tools to write dictionaries so that entries are structured in XML format. These dictionaries are the foundation for rule-based NLP tools such as FSTs. We also work actively towards enhancing these dictionaries and tools by using the latest state-of-the-art neural models by generating training data through rules and lexica
%R 10.18653/v1/2023.bigpicture-1.2
%U https://aclanthology.org/2023.bigpicture-1.2
%U https://doi.org/10.18653/v1/2023.bigpicture-1.2
%P 18-27
Markdown (Informal)
[Working Towards Digital Documentation of Uralic Languages With Open-Source Tools and Modern NLP Methods](https://aclanthology.org/2023.bigpicture-1.2) (Hämäläinen et al., BigPicture 2023)
ACL