@inproceedings{jude-ogundepo-etal-2022-afriteva,
title = "{A}fri{T}e{VA}: Extending ?Small Data? Pretraining Approaches to Sequence-to-Sequence Models",
author = "Jude Ogundepo, Odunayo and
Oladipo, Akintunde and
Adeyemi, Mofetoluwa and
Ogueji, Kelechi and
Lin, Jimmy",
editor = "Cherry, Colin and
Fan, Angela and
Foster, George and
Haffari, Gholamreza (Reza) and
Khadivi, Shahram and
Peng, Nanyun (Violet) and
Ren, Xiang and
Shareghi, Ehsan and
Swayamdipta, Swabha",
booktitle = "Proceedings of the Third Workshop on Deep Learning for Low-Resource Natural Language Processing",
month = jul,
year = "2022",
address = "Hybrid",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.deeplo-1.14",
doi = "10.18653/v1/2022.deeplo-1.14",
pages = "126--135",
abstract = "Pretrained language models represent the state of the art in NLP, but the successful construction of such models often requires large amounts of data and computational resources. Thus, the paucity of data for low-resource languages impedes the development of robust NLP capabilities for these languages. There has been some recent success in pretraining encoderonly models solely on a combination of lowresource African languages, exemplified by AfriBERTa. In this work, we extend the approach of {``}small data{''} pretraining to encoder{--} decoder models. We introduce AfriTeVa, a family of sequence-to-sequence models derived from T5 that are pretrained on 10 African languages from scratch. With a pretraining corpus of only around 1GB, we show that it is possible to achieve competitive downstream effectiveness for machine translation and text classification, compared to larger models trained on much more data. All the code and model checkpoints described in this work are publicly available at \url{https://github.com/castorini/afriteva}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jude-ogundepo-etal-2022-afriteva">
<titleInfo>
<title>AfriTeVA: Extending ?Small Data? Pretraining Approaches to Sequence-to-Sequence Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Odunayo</namePart>
<namePart type="family">Jude Ogundepo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akintunde</namePart>
<namePart type="family">Oladipo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mofetoluwa</namePart>
<namePart type="family">Adeyemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kelechi</namePart>
<namePart type="family">Ogueji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimmy</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Deep Learning for Low-Resource Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Foster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gholamreza</namePart>
<namePart type="given">(Reza)</namePart>
<namePart type="family">Haffari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahram</namePart>
<namePart type="family">Khadivi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nanyun</namePart>
<namePart type="given">(Violet)</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Shareghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swabha</namePart>
<namePart type="family">Swayamdipta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pretrained language models represent the state of the art in NLP, but the successful construction of such models often requires large amounts of data and computational resources. Thus, the paucity of data for low-resource languages impedes the development of robust NLP capabilities for these languages. There has been some recent success in pretraining encoderonly models solely on a combination of lowresource African languages, exemplified by AfriBERTa. In this work, we extend the approach of “small data” pretraining to encoder– decoder models. We introduce AfriTeVa, a family of sequence-to-sequence models derived from T5 that are pretrained on 10 African languages from scratch. With a pretraining corpus of only around 1GB, we show that it is possible to achieve competitive downstream effectiveness for machine translation and text classification, compared to larger models trained on much more data. All the code and model checkpoints described in this work are publicly available at https://github.com/castorini/afriteva.</abstract>
<identifier type="citekey">jude-ogundepo-etal-2022-afriteva</identifier>
<identifier type="doi">10.18653/v1/2022.deeplo-1.14</identifier>
<location>
<url>https://aclanthology.org/2022.deeplo-1.14</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>126</start>
<end>135</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AfriTeVA: Extending ?Small Data? Pretraining Approaches to Sequence-to-Sequence Models
%A Jude Ogundepo, Odunayo
%A Oladipo, Akintunde
%A Adeyemi, Mofetoluwa
%A Ogueji, Kelechi
%A Lin, Jimmy
%Y Cherry, Colin
%Y Fan, Angela
%Y Foster, George
%Y Haffari, Gholamreza (Reza)
%Y Khadivi, Shahram
%Y Peng, Nanyun (Violet)
%Y Ren, Xiang
%Y Shareghi, Ehsan
%Y Swayamdipta, Swabha
%S Proceedings of the Third Workshop on Deep Learning for Low-Resource Natural Language Processing
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid
%F jude-ogundepo-etal-2022-afriteva
%X Pretrained language models represent the state of the art in NLP, but the successful construction of such models often requires large amounts of data and computational resources. Thus, the paucity of data for low-resource languages impedes the development of robust NLP capabilities for these languages. There has been some recent success in pretraining encoderonly models solely on a combination of lowresource African languages, exemplified by AfriBERTa. In this work, we extend the approach of “small data” pretraining to encoder– decoder models. We introduce AfriTeVa, a family of sequence-to-sequence models derived from T5 that are pretrained on 10 African languages from scratch. With a pretraining corpus of only around 1GB, we show that it is possible to achieve competitive downstream effectiveness for machine translation and text classification, compared to larger models trained on much more data. All the code and model checkpoints described in this work are publicly available at https://github.com/castorini/afriteva.
%R 10.18653/v1/2022.deeplo-1.14
%U https://aclanthology.org/2022.deeplo-1.14
%U https://doi.org/10.18653/v1/2022.deeplo-1.14
%P 126-135
Markdown (Informal)
[AfriTeVA: Extending ?Small Data? Pretraining Approaches to Sequence-to-Sequence Models](https://aclanthology.org/2022.deeplo-1.14) (Jude Ogundepo et al., DeepLo 2022)
ACL