@inproceedings{ventayol-boada-etal-2023-unsupervised,
title = "Unsupervised part-of-speech induction for language description: Modeling documentation materials in Kolyma {Y}ukaghir",
author = "Ventayol-boada, Albert and
Roll, Nathan and
Todd, Simon",
editor = "Serikov, Oleg and
Voloshina, Ekaterina and
Postnikova, Anna and
Klyachko, Elena and
Vylomova, Ekaterina and
Shavrina, Tatiana and
Le Ferrand, Eric and
Malykh, Valentin and
Tyers, Francis and
Arkhangelskiy, Timofey and
Mikhailov, Vladislav",
booktitle = "Proceedings of the Second Workshop on NLP Applications to Field Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.fieldmatters-1.3",
doi = "10.18653/v1/2023.fieldmatters-1.3",
pages = "23--29",
abstract = "This study investigates the clustering of words into Part-of-Speech (POS) classes in Kolyma Yukaghir. In grammatical descriptions, lexical items are assigned to POS classes based on their morphological paradigms. Discursively, however, these classes share a fair amount of morphology. In this study, we turn to POS induction to evaluate if classes based on quantification of the distributions in which roots and affixes are used can be useful for language description purposes, and, if so, what those classes might be. We qualitatively compare clusters of roots and affixes based on four different definitions of their distributions. The results show that clustering is more reliable for words that typically bear more morphology. Additionally, the results suggest that the number of POS classes in Kolyma Yukaghir might be smaller than stated in current descriptions. This study thus demonstrates how unsupervised learning methods can provide insights for language description, particularly for highly inflectional languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ventayol-boada-etal-2023-unsupervised">
<titleInfo>
<title>Unsupervised part-of-speech induction for language description: Modeling documentation materials in Kolyma Yukaghir</title>
</titleInfo>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Ventayol-boada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Roll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Todd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timofey</namePart>
<namePart type="family">Arkhangelskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Mikhailov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study investigates the clustering of words into Part-of-Speech (POS) classes in Kolyma Yukaghir. In grammatical descriptions, lexical items are assigned to POS classes based on their morphological paradigms. Discursively, however, these classes share a fair amount of morphology. In this study, we turn to POS induction to evaluate if classes based on quantification of the distributions in which roots and affixes are used can be useful for language description purposes, and, if so, what those classes might be. We qualitatively compare clusters of roots and affixes based on four different definitions of their distributions. The results show that clustering is more reliable for words that typically bear more morphology. Additionally, the results suggest that the number of POS classes in Kolyma Yukaghir might be smaller than stated in current descriptions. This study thus demonstrates how unsupervised learning methods can provide insights for language description, particularly for highly inflectional languages.</abstract>
<identifier type="citekey">ventayol-boada-etal-2023-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2023.fieldmatters-1.3</identifier>
<location>
<url>https://aclanthology.org/2023.fieldmatters-1.3</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>23</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised part-of-speech induction for language description: Modeling documentation materials in Kolyma Yukaghir
%A Ventayol-boada, Albert
%A Roll, Nathan
%A Todd, Simon
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Postnikova, Anna
%Y Klyachko, Elena
%Y Vylomova, Ekaterina
%Y Shavrina, Tatiana
%Y Le Ferrand, Eric
%Y Malykh, Valentin
%Y Tyers, Francis
%Y Arkhangelskiy, Timofey
%Y Mikhailov, Vladislav
%S Proceedings of the Second Workshop on NLP Applications to Field Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F ventayol-boada-etal-2023-unsupervised
%X This study investigates the clustering of words into Part-of-Speech (POS) classes in Kolyma Yukaghir. In grammatical descriptions, lexical items are assigned to POS classes based on their morphological paradigms. Discursively, however, these classes share a fair amount of morphology. In this study, we turn to POS induction to evaluate if classes based on quantification of the distributions in which roots and affixes are used can be useful for language description purposes, and, if so, what those classes might be. We qualitatively compare clusters of roots and affixes based on four different definitions of their distributions. The results show that clustering is more reliable for words that typically bear more morphology. Additionally, the results suggest that the number of POS classes in Kolyma Yukaghir might be smaller than stated in current descriptions. This study thus demonstrates how unsupervised learning methods can provide insights for language description, particularly for highly inflectional languages.
%R 10.18653/v1/2023.fieldmatters-1.3
%U https://aclanthology.org/2023.fieldmatters-1.3
%U https://doi.org/10.18653/v1/2023.fieldmatters-1.3
%P 23-29
Markdown (Informal)
[Unsupervised part-of-speech induction for language description: Modeling documentation materials in Kolyma Yukaghir](https://aclanthology.org/2023.fieldmatters-1.3) (Ventayol-boada et al., FieldMatters 2023)
ACL