@inproceedings{kazantseva-etal-2024-fitting,
title = "Fitting a Square Peg into a Round Hole: Creating a {U}ni{M}orph dataset of Kanien{'}k{\'e}ha Verbs",
author = "Kazantseva, Anna and
Martin, Akwirat{\'e}kha and
Michelson, Karin and
Koenig, Jean-Pierre",
editor = "Moeller, Sarah and
Agyapong, Godfred and
Arppe, Antti and
Chaudhary, Aditi and
Rijhwani, Shruti and
Cox, Christopher and
Henke, Ryan and
Palmer, Alexis and
Rosenblum, Daisy and
Schwartz, Lane",
booktitle = "Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.computel-1.7",
pages = "39--51",
abstract = "This paper describes efforts to annotate a dataset of verbs in the Iroquoian language Kanien{'}k{\'e}ha (a.k.a. Mohawk) using the UniMorph schema (Batsuren et al. 2022a). It is based on the output of a symbolic model - a hand-built verb conjugator. Morphological constituents of each verb are automatically annotated with UniMorph tags. Overall the process was smooth but some central features of the language did not fall neatly into the schema which resulted in a large number of custom tags and a somewhat ad hoc mapping process. We think the same difficulties are likely to arise for other Iroquoian languages and perhaps other North American language families. This paper describes our decision making process with respect to Kanien{'}k{\'e}ha and reports preliminary results of morphological induction experiments using the dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kazantseva-etal-2024-fitting">
<titleInfo>
<title>Fitting a Square Peg into a Round Hole: Creating a UniMorph dataset of Kanien’kéha Verbs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akwiratékha</namePart>
<namePart type="family">Martin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Michelson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Pierre</namePart>
<namePart type="family">Koenig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Godfred</namePart>
<namePart type="family">Agyapong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cox</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Henke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisy</namePart>
<namePart type="family">Rosenblum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lane</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes efforts to annotate a dataset of verbs in the Iroquoian language Kanien’kéha (a.k.a. Mohawk) using the UniMorph schema (Batsuren et al. 2022a). It is based on the output of a symbolic model - a hand-built verb conjugator. Morphological constituents of each verb are automatically annotated with UniMorph tags. Overall the process was smooth but some central features of the language did not fall neatly into the schema which resulted in a large number of custom tags and a somewhat ad hoc mapping process. We think the same difficulties are likely to arise for other Iroquoian languages and perhaps other North American language families. This paper describes our decision making process with respect to Kanien’kéha and reports preliminary results of morphological induction experiments using the dataset.</abstract>
<identifier type="citekey">kazantseva-etal-2024-fitting</identifier>
<location>
<url>https://aclanthology.org/2024.computel-1.7</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>39</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fitting a Square Peg into a Round Hole: Creating a UniMorph dataset of Kanien’kéha Verbs
%A Kazantseva, Anna
%A Martin, Akwiratékha
%A Michelson, Karin
%A Koenig, Jean-Pierre
%Y Moeller, Sarah
%Y Agyapong, Godfred
%Y Arppe, Antti
%Y Chaudhary, Aditi
%Y Rijhwani, Shruti
%Y Cox, Christopher
%Y Henke, Ryan
%Y Palmer, Alexis
%Y Rosenblum, Daisy
%Y Schwartz, Lane
%S Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F kazantseva-etal-2024-fitting
%X This paper describes efforts to annotate a dataset of verbs in the Iroquoian language Kanien’kéha (a.k.a. Mohawk) using the UniMorph schema (Batsuren et al. 2022a). It is based on the output of a symbolic model - a hand-built verb conjugator. Morphological constituents of each verb are automatically annotated with UniMorph tags. Overall the process was smooth but some central features of the language did not fall neatly into the schema which resulted in a large number of custom tags and a somewhat ad hoc mapping process. We think the same difficulties are likely to arise for other Iroquoian languages and perhaps other North American language families. This paper describes our decision making process with respect to Kanien’kéha and reports preliminary results of morphological induction experiments using the dataset.
%U https://aclanthology.org/2024.computel-1.7
%P 39-51
Markdown (Informal)
[Fitting a Square Peg into a Round Hole: Creating a UniMorph dataset of Kanien’kéha Verbs](https://aclanthology.org/2024.computel-1.7) (Kazantseva et al., ComputEL-WS 2024)
ACL