@inproceedings{maehlum-ivanova-2023-phonotactics,
title = "Phonotactics as an Aid in Low Resource Loan Word Detection and Morphological Analysis in Sakha",
author = "M{\ae}hlum, Petter and
Ivanova, Sardana",
editor = "Ilinykh, Nikolai and
Morger, Felix and
Dann{\'e}lls, Dana and
Dobnik, Simon and
Megyesi, Be{\'a}ta and
Nivre, Joakim",
booktitle = "Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)",
month = may,
year = "2023",
address = "T{\'o}rshavn, the Faroe Islands",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.resourceful-1.14",
pages = "111--120",
abstract = "Obtaining information about loan words and irregular morphological patterns can be difficult for low-resource languages. Using Sakha as an example, we show that it is possible to exploit known phonemic regularities such as vowel harmony and consonant distributions to identify loan words and irregular patterns, which can be helpful in rule-based downstream tasks such as parsing and POS-tagging. We evaluate phonemically inspired methods for loanword detection, combined with bi-gram vowel transition probabilities to inspect irregularities in the morphology of loanwords. We show that both these techniques can be useful for the detection of such patterns. Finally, we inspect the plural suffix -ЛАр [-LAr] to observe some of the variation in morphology between native and foreign words.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maehlum-ivanova-2023-phonotactics">
<titleInfo>
<title>Phonotactics as an Aid in Low Resource Loan Word Detection and Morphological Analysis in Sakha</title>
</titleInfo>
<name type="personal">
<namePart type="given">Petter</namePart>
<namePart type="family">Mæhlum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sardana</namePart>
<namePart type="family">Ivanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Morger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Dannélls</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beáta</namePart>
<namePart type="family">Megyesi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tórshavn, the Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Obtaining information about loan words and irregular morphological patterns can be difficult for low-resource languages. Using Sakha as an example, we show that it is possible to exploit known phonemic regularities such as vowel harmony and consonant distributions to identify loan words and irregular patterns, which can be helpful in rule-based downstream tasks such as parsing and POS-tagging. We evaluate phonemically inspired methods for loanword detection, combined with bi-gram vowel transition probabilities to inspect irregularities in the morphology of loanwords. We show that both these techniques can be useful for the detection of such patterns. Finally, we inspect the plural suffix -ЛАр [-LAr] to observe some of the variation in morphology between native and foreign words.</abstract>
<identifier type="citekey">maehlum-ivanova-2023-phonotactics</identifier>
<location>
<url>https://aclanthology.org/2023.resourceful-1.14</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>111</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Phonotactics as an Aid in Low Resource Loan Word Detection and Morphological Analysis in Sakha
%A Mæhlum, Petter
%A Ivanova, Sardana
%Y Ilinykh, Nikolai
%Y Morger, Felix
%Y Dannélls, Dana
%Y Dobnik, Simon
%Y Megyesi, Beáta
%Y Nivre, Joakim
%S Proceedings of the Second Workshop on Resources and Representations for Under-Resourced Languages and Domains (RESOURCEFUL-2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Tórshavn, the Faroe Islands
%F maehlum-ivanova-2023-phonotactics
%X Obtaining information about loan words and irregular morphological patterns can be difficult for low-resource languages. Using Sakha as an example, we show that it is possible to exploit known phonemic regularities such as vowel harmony and consonant distributions to identify loan words and irregular patterns, which can be helpful in rule-based downstream tasks such as parsing and POS-tagging. We evaluate phonemically inspired methods for loanword detection, combined with bi-gram vowel transition probabilities to inspect irregularities in the morphology of loanwords. We show that both these techniques can be useful for the detection of such patterns. Finally, we inspect the plural suffix -ЛАр [-LAr] to observe some of the variation in morphology between native and foreign words.
%U https://aclanthology.org/2023.resourceful-1.14
%P 111-120
Markdown (Informal)
[Phonotactics as an Aid in Low Resource Loan Word Detection and Morphological Analysis in Sakha](https://aclanthology.org/2023.resourceful-1.14) (Mæhlum & Ivanova, RESOURCEFUL 2023)
ACL