@inproceedings{khatri-etal-2023-study,
title = "A Study of Multilingual versus Meta-Learning for Language Model Pre-Training for Adaptation to Unseen Low Resource Languages",
author = "Khatri, Jyotsana and
Murthy, Rudra and
Azad, Amar Prakash and
Bhattacharyya, Pushpak",
editor = "Utiyama, Masao and
Wang, Rui",
booktitle = "Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track",
month = sep,
year = "2023",
address = "Macau SAR, China",
publisher = "Asia-Pacific Association for Machine Translation",
url = "https://aclanthology.org/2023.mtsummit-research.3",
pages = "26--34",
abstract = "In this paper, we compare two approaches to train a multilingual language model: (i) simple multilingual learning using data-mixing, and (ii) meta-learning. We examine the performance of these models by extending them to unseen language pairs and further finetune them for the task of unsupervised NMT. We perform several experiments with varying amounts of data and give a comparative analysis of the approaches. We observe that both approaches give a comparable performance, and meta-learning gives slightly better results in a few cases of low amounts of data. For Oriya-Punjabi language pair, meta-learning performs better than multilingual learning when using 2M, and 3M sentences.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khatri-etal-2023-study">
<titleInfo>
<title>A Study of Multilingual versus Meta-Learning for Language Model Pre-Training for Adaptation to Unseen Low Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyotsana</namePart>
<namePart type="family">Khatri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudra</namePart>
<namePart type="family">Murthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amar</namePart>
<namePart type="given">Prakash</namePart>
<namePart type="family">Azad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Asia-Pacific Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Macau SAR, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we compare two approaches to train a multilingual language model: (i) simple multilingual learning using data-mixing, and (ii) meta-learning. We examine the performance of these models by extending them to unseen language pairs and further finetune them for the task of unsupervised NMT. We perform several experiments with varying amounts of data and give a comparative analysis of the approaches. We observe that both approaches give a comparable performance, and meta-learning gives slightly better results in a few cases of low amounts of data. For Oriya-Punjabi language pair, meta-learning performs better than multilingual learning when using 2M, and 3M sentences.</abstract>
<identifier type="citekey">khatri-etal-2023-study</identifier>
<location>
<url>https://aclanthology.org/2023.mtsummit-research.3</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>26</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Study of Multilingual versus Meta-Learning for Language Model Pre-Training for Adaptation to Unseen Low Resource Languages
%A Khatri, Jyotsana
%A Murthy, Rudra
%A Azad, Amar Prakash
%A Bhattacharyya, Pushpak
%Y Utiyama, Masao
%Y Wang, Rui
%S Proceedings of Machine Translation Summit XIX, Vol. 1: Research Track
%D 2023
%8 September
%I Asia-Pacific Association for Machine Translation
%C Macau SAR, China
%F khatri-etal-2023-study
%X In this paper, we compare two approaches to train a multilingual language model: (i) simple multilingual learning using data-mixing, and (ii) meta-learning. We examine the performance of these models by extending them to unseen language pairs and further finetune them for the task of unsupervised NMT. We perform several experiments with varying amounts of data and give a comparative analysis of the approaches. We observe that both approaches give a comparable performance, and meta-learning gives slightly better results in a few cases of low amounts of data. For Oriya-Punjabi language pair, meta-learning performs better than multilingual learning when using 2M, and 3M sentences.
%U https://aclanthology.org/2023.mtsummit-research.3
%P 26-34
Markdown (Informal)
[A Study of Multilingual versus Meta-Learning for Language Model Pre-Training for Adaptation to Unseen Low Resource Languages](https://aclanthology.org/2023.mtsummit-research.3) (Khatri et al., MTSummit 2023)
ACL