@inproceedings{armengol-estape-etal-2022-multilingual,
title = "On the Multilingual Capabilities of Very Large-Scale {E}nglish Language Models",
author = "Armengol-Estap{\'e}, Jordi and
de Gibert Bonet, Ona and
Melero, Maite",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.327",
pages = "3056--3068",
abstract = "Generative Pre-trained Transformers (GPTs) have recently been scaled to unprecedented sizes in the history of machine learning. These models, solely trained on the language modeling objective, have been shown to exhibit outstanding zero, one, and few-shot learning capabilities in a number of different tasks. Nevertheless, aside from anecdotal experiences, little is known regarding their multilingual capabilities, given the fact that the pre-training corpus is almost entirely composed of English text. In this work, we investigate its potential and limits in three tasks: extractive question-answering, text summarization and natural language generation for five different languages, as well as the effect of scale in terms of model size. Our results show that GPT-3 can be almost as useful for many languages as it is for English, with room for improvement if optimization of the tokenization is addressed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="armengol-estape-etal-2022-multilingual">
<titleInfo>
<title>On the Multilingual Capabilities of Very Large-Scale English Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jordi</namePart>
<namePart type="family">Armengol-Estapé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert Bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generative Pre-trained Transformers (GPTs) have recently been scaled to unprecedented sizes in the history of machine learning. These models, solely trained on the language modeling objective, have been shown to exhibit outstanding zero, one, and few-shot learning capabilities in a number of different tasks. Nevertheless, aside from anecdotal experiences, little is known regarding their multilingual capabilities, given the fact that the pre-training corpus is almost entirely composed of English text. In this work, we investigate its potential and limits in three tasks: extractive question-answering, text summarization and natural language generation for five different languages, as well as the effect of scale in terms of model size. Our results show that GPT-3 can be almost as useful for many languages as it is for English, with room for improvement if optimization of the tokenization is addressed.</abstract>
<identifier type="citekey">armengol-estape-etal-2022-multilingual</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.327</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>3056</start>
<end>3068</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Multilingual Capabilities of Very Large-Scale English Language Models
%A Armengol-Estapé, Jordi
%A de Gibert Bonet, Ona
%A Melero, Maite
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F armengol-estape-etal-2022-multilingual
%X Generative Pre-trained Transformers (GPTs) have recently been scaled to unprecedented sizes in the history of machine learning. These models, solely trained on the language modeling objective, have been shown to exhibit outstanding zero, one, and few-shot learning capabilities in a number of different tasks. Nevertheless, aside from anecdotal experiences, little is known regarding their multilingual capabilities, given the fact that the pre-training corpus is almost entirely composed of English text. In this work, we investigate its potential and limits in three tasks: extractive question-answering, text summarization and natural language generation for five different languages, as well as the effect of scale in terms of model size. Our results show that GPT-3 can be almost as useful for many languages as it is for English, with room for improvement if optimization of the tokenization is addressed.
%U https://aclanthology.org/2022.lrec-1.327
%P 3056-3068
Markdown (Informal)
[On the Multilingual Capabilities of Very Large-Scale English Language Models](https://aclanthology.org/2022.lrec-1.327) (Armengol-Estapé et al., LREC 2022)
ACL