@inproceedings{kosyak-tyers-2022-predictive,
title = "Predictive Text for Agglutinative and Polysynthetic Languages",
author = "Kosyak, Sergey and
Tyers, Francis",
editor = "Serikov, Oleg and
Voloshina, Ekaterina and
Postnikova, Anna and
Klyachko, Elena and
Neminova, Ekaterina and
Vylomova, Ekaterina and
Shavrina, Tatiana and
Ferrand, Eric Le and
Malykh, Valentin and
Tyers, Francis and
Arkhangelskiy, Timofey and
Mikhailov, Vladislav and
Fenogenova, Alena",
booktitle = "Proceedings of the first workshop on NLP applications to field linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Conference on Computational Linguistics",
url = "https://aclanthology.org/2022.fieldmatters-1.9",
pages = "77--85",
abstract = "This paper presents a set of experiments in the area of morphological modelling and prediction. We test whether morphological segmentation can compete against statistical segmentation in the tasks of language modelling and predictive text entry for two under-resourced and indigenous languages, K{'}iche{'} and Chukchi. We use different segmentation methods {---} both statistical and morphological {---} to make datasets that are used to train models of different types: single-way segmented, which are trained using data from one segmenter; two-way segmented, which are trained using concatenated data from two segmenters; and finetuned, which are trained on two datasets from different segmenters. We compute word and character level perplexities and find that single-way segmented models trained on morphologically segmented data show the highest performance. Finally, we evaluate the language models on the task of predictive text entry using gold standard data and measure the average number of clicks per character and keystroke savings rate. We find that the models trained on morphologically segmented data show better scores, although with substantial room for improvement. At last, we propose the usage of morphological segmentation in order to improve the end-user experience while using predictive text and we plan on testing this assumption by doing end-user evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kosyak-tyers-2022-predictive">
<titleInfo>
<title>Predictive Text for Agglutinative and Polysynthetic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergey</namePart>
<namePart type="family">Kosyak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first workshop on NLP applications to field linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Neminova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Tyers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timofey</namePart>
<namePart type="family">Arkhangelskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladislav</namePart>
<namePart type="family">Mikhailov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alena</namePart>
<namePart type="family">Fenogenova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a set of experiments in the area of morphological modelling and prediction. We test whether morphological segmentation can compete against statistical segmentation in the tasks of language modelling and predictive text entry for two under-resourced and indigenous languages, K’iche’ and Chukchi. We use different segmentation methods — both statistical and morphological — to make datasets that are used to train models of different types: single-way segmented, which are trained using data from one segmenter; two-way segmented, which are trained using concatenated data from two segmenters; and finetuned, which are trained on two datasets from different segmenters. We compute word and character level perplexities and find that single-way segmented models trained on morphologically segmented data show the highest performance. Finally, we evaluate the language models on the task of predictive text entry using gold standard data and measure the average number of clicks per character and keystroke savings rate. We find that the models trained on morphologically segmented data show better scores, although with substantial room for improvement. At last, we propose the usage of morphological segmentation in order to improve the end-user experience while using predictive text and we plan on testing this assumption by doing end-user evaluation.</abstract>
<identifier type="citekey">kosyak-tyers-2022-predictive</identifier>
<location>
<url>https://aclanthology.org/2022.fieldmatters-1.9</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>77</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predictive Text for Agglutinative and Polysynthetic Languages
%A Kosyak, Sergey
%A Tyers, Francis
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Postnikova, Anna
%Y Klyachko, Elena
%Y Neminova, Ekaterina
%Y Vylomova, Ekaterina
%Y Shavrina, Tatiana
%Y Ferrand, Eric Le
%Y Malykh, Valentin
%Y Tyers, Francis
%Y Arkhangelskiy, Timofey
%Y Mikhailov, Vladislav
%Y Fenogenova, Alena
%S Proceedings of the first workshop on NLP applications to field linguistics
%D 2022
%8 October
%I International Conference on Computational Linguistics
%C Gyeongju, Republic of Korea
%F kosyak-tyers-2022-predictive
%X This paper presents a set of experiments in the area of morphological modelling and prediction. We test whether morphological segmentation can compete against statistical segmentation in the tasks of language modelling and predictive text entry for two under-resourced and indigenous languages, K’iche’ and Chukchi. We use different segmentation methods — both statistical and morphological — to make datasets that are used to train models of different types: single-way segmented, which are trained using data from one segmenter; two-way segmented, which are trained using concatenated data from two segmenters; and finetuned, which are trained on two datasets from different segmenters. We compute word and character level perplexities and find that single-way segmented models trained on morphologically segmented data show the highest performance. Finally, we evaluate the language models on the task of predictive text entry using gold standard data and measure the average number of clicks per character and keystroke savings rate. We find that the models trained on morphologically segmented data show better scores, although with substantial room for improvement. At last, we propose the usage of morphological segmentation in order to improve the end-user experience while using predictive text and we plan on testing this assumption by doing end-user evaluation.
%U https://aclanthology.org/2022.fieldmatters-1.9
%P 77-85
Markdown (Informal)
[Predictive Text for Agglutinative and Polysynthetic Languages](https://aclanthology.org/2022.fieldmatters-1.9) (Kosyak & Tyers, FieldMatters 2022)
ACL