@inproceedings{peters-martins-2022-beyond,
title = "Beyond Characters: Subword-level Morpheme Segmentation",
author = "Peters, Ben and
Martins, Andre F. T.",
editor = "Nicolai, Garrett and
Chodroff, Eleanor",
booktitle = "Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigmorphon-1.14",
doi = "10.18653/v1/2022.sigmorphon-1.14",
pages = "131--138",
abstract = "This paper presents DeepSPIN{'}s submissions to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation. We make three submissions, all to the word-level subtask. First, we show that entmax-based sparse sequence-tosequence models deliver large improvements over conventional softmax-based models, echoing results from other tasks. Then, we challenge the assumption that models for morphological tasks should be trained at the character level by building a transformer that generates morphemes as sequences of unigram language model-induced subwords. This subword transformer outperforms all of our character-level models and wins the word-level subtask. Although we do not submit an official submission to the sentence-level subtask, we show that this subword-based approach is highly effective there as well.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peters-martins-2022-beyond">
<titleInfo>
<title>Beyond Characters: Subword-level Morpheme Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Peters</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="given">F</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Chodroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents DeepSPIN’s submissions to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation. We make three submissions, all to the word-level subtask. First, we show that entmax-based sparse sequence-tosequence models deliver large improvements over conventional softmax-based models, echoing results from other tasks. Then, we challenge the assumption that models for morphological tasks should be trained at the character level by building a transformer that generates morphemes as sequences of unigram language model-induced subwords. This subword transformer outperforms all of our character-level models and wins the word-level subtask. Although we do not submit an official submission to the sentence-level subtask, we show that this subword-based approach is highly effective there as well.</abstract>
<identifier type="citekey">peters-martins-2022-beyond</identifier>
<identifier type="doi">10.18653/v1/2022.sigmorphon-1.14</identifier>
<location>
<url>https://aclanthology.org/2022.sigmorphon-1.14</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>131</start>
<end>138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Characters: Subword-level Morpheme Segmentation
%A Peters, Ben
%A Martins, Andre F. T.
%Y Nicolai, Garrett
%Y Chodroff, Eleanor
%S Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F peters-martins-2022-beyond
%X This paper presents DeepSPIN’s submissions to the SIGMORPHON 2022 Shared Task on Morpheme Segmentation. We make three submissions, all to the word-level subtask. First, we show that entmax-based sparse sequence-tosequence models deliver large improvements over conventional softmax-based models, echoing results from other tasks. Then, we challenge the assumption that models for morphological tasks should be trained at the character level by building a transformer that generates morphemes as sequences of unigram language model-induced subwords. This subword transformer outperforms all of our character-level models and wins the word-level subtask. Although we do not submit an official submission to the sentence-level subtask, we show that this subword-based approach is highly effective there as well.
%R 10.18653/v1/2022.sigmorphon-1.14
%U https://aclanthology.org/2022.sigmorphon-1.14
%U https://doi.org/10.18653/v1/2022.sigmorphon-1.14
%P 131-138
Markdown (Informal)
[Beyond Characters: Subword-level Morpheme Segmentation](https://aclanthology.org/2022.sigmorphon-1.14) (Peters & Martins, SIGMORPHON 2022)
ACL
- Ben Peters and Andre F. T. Martins. 2022. Beyond Characters: Subword-level Morpheme Segmentation. In Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 131–138, Seattle, Washington. Association for Computational Linguistics.