@inproceedings{abudouwaili-etal-2023-strategies,
title = "Strategies to Improve Low-Resource Agglutinative Languages Morphological Inflection",
author = "Abudouwaili, Gulinigeer and
Ablez, Wayit and
Abiderexiti, Kahaerjiang and
Wumaier, Aishan and
Yi, Nian",
editor = "Jiang, Jing and
Reitter, David and
Deng, Shumin",
booktitle = "Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.conll-1.34",
doi = "10.18653/v1/2023.conll-1.34",
pages = "508--520",
abstract = "Morphological inflection is a crucial task in the field of morphology and is typically considered a sequence transduction task. In recent years, it has received substantial attention from researchers and made significant progress. Models have achieved impressive performance levels for both high- and low-resource languages. However, when the distribution of instances in the training dataset changes, or novel lemma or feature labels are predicted, the model{'}s accuracy declines. In agglutinative languages, morphological inflection involves phonological phenomena while generating new words, which can alter the syllable patterns at the boundary between the lemma and the suffixes. This paper proposes four strategies for low-resource agglutinative languages to enhance the model{'}s generalization ability. Firstly, a convolution module extracts syllable-like units from lemmas, allowing the model to learn syllable features. Secondly, the lemma and feature labels are represented separately in the input, and the position encoding of the feature labels is marked so that the model learns the order between suffixes and labels. Thirdly, the model recognizes the common substrings in lemmas through two special characters and copies them into words. Finally, combined with syllable features, we improve the data augmentation method. A series of experiments show that the proposed model in this paper is superior to other baseline models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abudouwaili-etal-2023-strategies">
<titleInfo>
<title>Strategies to Improve Low-Resource Agglutinative Languages Morphological Inflection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gulinigeer</namePart>
<namePart type="family">Abudouwaili</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wayit</namePart>
<namePart type="family">Ablez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kahaerjiang</namePart>
<namePart type="family">Abiderexiti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aishan</namePart>
<namePart type="family">Wumaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nian</namePart>
<namePart type="family">Yi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Reitter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shumin</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Morphological inflection is a crucial task in the field of morphology and is typically considered a sequence transduction task. In recent years, it has received substantial attention from researchers and made significant progress. Models have achieved impressive performance levels for both high- and low-resource languages. However, when the distribution of instances in the training dataset changes, or novel lemma or feature labels are predicted, the model’s accuracy declines. In agglutinative languages, morphological inflection involves phonological phenomena while generating new words, which can alter the syllable patterns at the boundary between the lemma and the suffixes. This paper proposes four strategies for low-resource agglutinative languages to enhance the model’s generalization ability. Firstly, a convolution module extracts syllable-like units from lemmas, allowing the model to learn syllable features. Secondly, the lemma and feature labels are represented separately in the input, and the position encoding of the feature labels is marked so that the model learns the order between suffixes and labels. Thirdly, the model recognizes the common substrings in lemmas through two special characters and copies them into words. Finally, combined with syllable features, we improve the data augmentation method. A series of experiments show that the proposed model in this paper is superior to other baseline models.</abstract>
<identifier type="citekey">abudouwaili-etal-2023-strategies</identifier>
<identifier type="doi">10.18653/v1/2023.conll-1.34</identifier>
<location>
<url>https://aclanthology.org/2023.conll-1.34</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>508</start>
<end>520</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Strategies to Improve Low-Resource Agglutinative Languages Morphological Inflection
%A Abudouwaili, Gulinigeer
%A Ablez, Wayit
%A Abiderexiti, Kahaerjiang
%A Wumaier, Aishan
%A Yi, Nian
%Y Jiang, Jing
%Y Reitter, David
%Y Deng, Shumin
%S Proceedings of the 27th Conference on Computational Natural Language Learning (CoNLL)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F abudouwaili-etal-2023-strategies
%X Morphological inflection is a crucial task in the field of morphology and is typically considered a sequence transduction task. In recent years, it has received substantial attention from researchers and made significant progress. Models have achieved impressive performance levels for both high- and low-resource languages. However, when the distribution of instances in the training dataset changes, or novel lemma or feature labels are predicted, the model’s accuracy declines. In agglutinative languages, morphological inflection involves phonological phenomena while generating new words, which can alter the syllable patterns at the boundary between the lemma and the suffixes. This paper proposes four strategies for low-resource agglutinative languages to enhance the model’s generalization ability. Firstly, a convolution module extracts syllable-like units from lemmas, allowing the model to learn syllable features. Secondly, the lemma and feature labels are represented separately in the input, and the position encoding of the feature labels is marked so that the model learns the order between suffixes and labels. Thirdly, the model recognizes the common substrings in lemmas through two special characters and copies them into words. Finally, combined with syllable features, we improve the data augmentation method. A series of experiments show that the proposed model in this paper is superior to other baseline models.
%R 10.18653/v1/2023.conll-1.34
%U https://aclanthology.org/2023.conll-1.34
%U https://doi.org/10.18653/v1/2023.conll-1.34
%P 508-520
Markdown (Informal)
[Strategies to Improve Low-Resource Agglutinative Languages Morphological Inflection](https://aclanthology.org/2023.conll-1.34) (Abudouwaili et al., CoNLL 2023)
ACL