@inproceedings{rouhe-etal-2022-morfessor,
title = "{M}orfessor-enriched features and multilingual training for canonical morphological segmentation",
author = {Rouhe, Aku and
Gr{\"o}nroos, Stig-Arne and
Virpioja, Sami and
Creutz, Mathias and
Kurimo, Mikko},
editor = "Nicolai, Garrett and
Chodroff, Eleanor",
booktitle = "Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigmorphon-1.16",
doi = "10.18653/v1/2022.sigmorphon-1.16",
pages = "144--151",
abstract = "In our submission to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation, we study whether an unsupervised morphological segmentation method, Morfessor, can help in a supervised setting. Previous research has shown the effectiveness of the approach in semisupervised settings with small amounts of labeled data. The current tasks vary in data size: the amount of word-level annotated training data is much larger, but the amount of sentencelevel annotated training data remains small. Our approach is to pre-segment the input data for a neural sequence-to-sequence model with the unsupervised method. As the unsupervised method can be trained with raw text data, we use Wikipedia to increase the amount of training data. In addition, we train multilingual models for the sentence-level task. The results for the Morfessor-enriched features are mixed, showing benefit for all three sentencelevel tasks but only some of the word-level tasks. The multilingual training yields considerable improvements over the monolingual sentence-level models, but it negates the effect of the enriched features.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rouhe-etal-2022-morfessor">
<titleInfo>
<title>Morfessor-enriched features and multilingual training for canonical morphological segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aku</namePart>
<namePart type="family">Rouhe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stig-Arne</namePart>
<namePart type="family">Grönroos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sami</namePart>
<namePart type="family">Virpioja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Creutz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikko</namePart>
<namePart type="family">Kurimo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Chodroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In our submission to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation, we study whether an unsupervised morphological segmentation method, Morfessor, can help in a supervised setting. Previous research has shown the effectiveness of the approach in semisupervised settings with small amounts of labeled data. The current tasks vary in data size: the amount of word-level annotated training data is much larger, but the amount of sentencelevel annotated training data remains small. Our approach is to pre-segment the input data for a neural sequence-to-sequence model with the unsupervised method. As the unsupervised method can be trained with raw text data, we use Wikipedia to increase the amount of training data. In addition, we train multilingual models for the sentence-level task. The results for the Morfessor-enriched features are mixed, showing benefit for all three sentencelevel tasks but only some of the word-level tasks. The multilingual training yields considerable improvements over the monolingual sentence-level models, but it negates the effect of the enriched features.</abstract>
<identifier type="citekey">rouhe-etal-2022-morfessor</identifier>
<identifier type="doi">10.18653/v1/2022.sigmorphon-1.16</identifier>
<location>
<url>https://aclanthology.org/2022.sigmorphon-1.16</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>144</start>
<end>151</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morfessor-enriched features and multilingual training for canonical morphological segmentation
%A Rouhe, Aku
%A Grönroos, Stig-Arne
%A Virpioja, Sami
%A Creutz, Mathias
%A Kurimo, Mikko
%Y Nicolai, Garrett
%Y Chodroff, Eleanor
%S Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F rouhe-etal-2022-morfessor
%X In our submission to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation, we study whether an unsupervised morphological segmentation method, Morfessor, can help in a supervised setting. Previous research has shown the effectiveness of the approach in semisupervised settings with small amounts of labeled data. The current tasks vary in data size: the amount of word-level annotated training data is much larger, but the amount of sentencelevel annotated training data remains small. Our approach is to pre-segment the input data for a neural sequence-to-sequence model with the unsupervised method. As the unsupervised method can be trained with raw text data, we use Wikipedia to increase the amount of training data. In addition, we train multilingual models for the sentence-level task. The results for the Morfessor-enriched features are mixed, showing benefit for all three sentencelevel tasks but only some of the word-level tasks. The multilingual training yields considerable improvements over the monolingual sentence-level models, but it negates the effect of the enriched features.
%R 10.18653/v1/2022.sigmorphon-1.16
%U https://aclanthology.org/2022.sigmorphon-1.16
%U https://doi.org/10.18653/v1/2022.sigmorphon-1.16
%P 144-151
Markdown (Informal)
[Morfessor-enriched features and multilingual training for canonical morphological segmentation](https://aclanthology.org/2022.sigmorphon-1.16) (Rouhe et al., SIGMORPHON 2022)
ACL