@inproceedings{peters-martins-2020-one,
title = "One-Size-Fits-All Multilingual Models",
author = "Peters, Ben and
Martins, Andr{\'e} F. T.",
editor = "Nicolai, Garrett and
Gorman, Kyle and
Cotterell, Ryan",
booktitle = "Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.sigmorphon-1.4",
doi = "10.18653/v1/2020.sigmorphon-1.4",
pages = "63--69",
abstract = "This paper presents DeepSPIN{'}s submissions to Tasks 0 and 1 of the SIGMORPHON 2020 Shared Task. For both tasks, we present multilingual models, training jointly on data in all languages. We perform no language-specific hyperparameter tuning {--} each of our submissions uses the same model for all languages. Our basic architecture is the sparse sequence-to-sequence model with entmax attention and loss, which allows our models to learn sparse, local alignments while still being trainable with gradient-based techniques. For Task 1, we achieve strong performance with both RNN- and transformer-based sparse models. For Task 0, we extend our RNN-based model to a multi-encoder set-up in which separate modules encode the lemma and inflection sequences. Despite our models{'} lack of language-specific tuning, they tie for first in Task 0 and place third in Task 1.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peters-martins-2020-one">
<titleInfo>
<title>One-Size-Fits-All Multilingual Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Peters</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="given">F</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Gorman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents DeepSPIN’s submissions to Tasks 0 and 1 of the SIGMORPHON 2020 Shared Task. For both tasks, we present multilingual models, training jointly on data in all languages. We perform no language-specific hyperparameter tuning – each of our submissions uses the same model for all languages. Our basic architecture is the sparse sequence-to-sequence model with entmax attention and loss, which allows our models to learn sparse, local alignments while still being trainable with gradient-based techniques. For Task 1, we achieve strong performance with both RNN- and transformer-based sparse models. For Task 0, we extend our RNN-based model to a multi-encoder set-up in which separate modules encode the lemma and inflection sequences. Despite our models’ lack of language-specific tuning, they tie for first in Task 0 and place third in Task 1.</abstract>
<identifier type="citekey">peters-martins-2020-one</identifier>
<identifier type="doi">10.18653/v1/2020.sigmorphon-1.4</identifier>
<location>
<url>https://aclanthology.org/2020.sigmorphon-1.4</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>63</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One-Size-Fits-All Multilingual Models
%A Peters, Ben
%A Martins, André F. T.
%Y Nicolai, Garrett
%Y Gorman, Kyle
%Y Cotterell, Ryan
%S Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F peters-martins-2020-one
%X This paper presents DeepSPIN’s submissions to Tasks 0 and 1 of the SIGMORPHON 2020 Shared Task. For both tasks, we present multilingual models, training jointly on data in all languages. We perform no language-specific hyperparameter tuning – each of our submissions uses the same model for all languages. Our basic architecture is the sparse sequence-to-sequence model with entmax attention and loss, which allows our models to learn sparse, local alignments while still being trainable with gradient-based techniques. For Task 1, we achieve strong performance with both RNN- and transformer-based sparse models. For Task 0, we extend our RNN-based model to a multi-encoder set-up in which separate modules encode the lemma and inflection sequences. Despite our models’ lack of language-specific tuning, they tie for first in Task 0 and place third in Task 1.
%R 10.18653/v1/2020.sigmorphon-1.4
%U https://aclanthology.org/2020.sigmorphon-1.4
%U https://doi.org/10.18653/v1/2020.sigmorphon-1.4
%P 63-69
Markdown (Informal)
[One-Size-Fits-All Multilingual Models](https://aclanthology.org/2020.sigmorphon-1.4) (Peters & Martins, SIGMORPHON 2020)
ACL
- Ben Peters and André F. T. Martins. 2020. One-Size-Fits-All Multilingual Models. In Proceedings of the 17th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology, pages 63–69, Online. Association for Computational Linguistics.