@inproceedings{telem-joyson-etal-2023-big,
title = "Can Big Models Help Diverse Languages? Investigating Large Pretrained Multilingual Models for Machine Translation of {I}ndian Languages",
author = "Singh, Telem Joyson and
Singh, Sanasam Ranbir and
Sarmah, Priyankoo",
editor = "D. Pawar, Jyoti and
Lalitha Devi, Sobha",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.66",
pages = "663--669",
abstract = "Machine translation of Indian languages is challenging due to several factors, including linguistic diversity, limited parallel data, language divergence, and complex morphology. Recently, large pre-trained multilingual models have shown promise in improving translation quality. In this paper, we conduct a large-scale study on applying large pre-trained models for English-Indic machine translation through transfer learning across languages and domains. This study systematically evaluates the practical gains these models can provide and analyzes their capabilities for the translation of the Indian language by transfer learning. Specifically, we experiment with several models, including Meta{'}s mBART, mBART-manyto-many, NLLB-200, M2M-100, and Google{'}s MT5. These models are fine-tuned on small, high-quality English-Indic parallel data across languages and domains. Our findings show that adapting large pre-trained models to particular languages by fine-tuning improves translation quality across the Indic languages, even for languages unseen during pretraining. Domain adaptation through continued fine-tuning improves results. Our study provides insights into utilizing large pretrained models to address the distinct challenges of MT of Indian languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="telem-joyson-etal-2023-big">
<titleInfo>
<title>Can Big Models Help Diverse Languages? Investigating Large Pretrained Multilingual Models for Machine Translation of Indian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Telem</namePart>
<namePart type="given">Joyson</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanasam</namePart>
<namePart type="given">Ranbir</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyankoo</namePart>
<namePart type="family">Sarmah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">D. Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation of Indian languages is challenging due to several factors, including linguistic diversity, limited parallel data, language divergence, and complex morphology. Recently, large pre-trained multilingual models have shown promise in improving translation quality. In this paper, we conduct a large-scale study on applying large pre-trained models for English-Indic machine translation through transfer learning across languages and domains. This study systematically evaluates the practical gains these models can provide and analyzes their capabilities for the translation of the Indian language by transfer learning. Specifically, we experiment with several models, including Meta’s mBART, mBART-manyto-many, NLLB-200, M2M-100, and Google’s MT5. These models are fine-tuned on small, high-quality English-Indic parallel data across languages and domains. Our findings show that adapting large pre-trained models to particular languages by fine-tuning improves translation quality across the Indic languages, even for languages unseen during pretraining. Domain adaptation through continued fine-tuning improves results. Our study provides insights into utilizing large pretrained models to address the distinct challenges of MT of Indian languages.</abstract>
<identifier type="citekey">telem-joyson-etal-2023-big</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.66</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>663</start>
<end>669</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Big Models Help Diverse Languages? Investigating Large Pretrained Multilingual Models for Machine Translation of Indian Languages
%A Singh, Telem Joyson
%A Singh, Sanasam Ranbir
%A Sarmah, Priyankoo
%Y D. Pawar, Jyoti
%Y Lalitha Devi, Sobha
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F telem-joyson-etal-2023-big
%X Machine translation of Indian languages is challenging due to several factors, including linguistic diversity, limited parallel data, language divergence, and complex morphology. Recently, large pre-trained multilingual models have shown promise in improving translation quality. In this paper, we conduct a large-scale study on applying large pre-trained models for English-Indic machine translation through transfer learning across languages and domains. This study systematically evaluates the practical gains these models can provide and analyzes their capabilities for the translation of the Indian language by transfer learning. Specifically, we experiment with several models, including Meta’s mBART, mBART-manyto-many, NLLB-200, M2M-100, and Google’s MT5. These models are fine-tuned on small, high-quality English-Indic parallel data across languages and domains. Our findings show that adapting large pre-trained models to particular languages by fine-tuning improves translation quality across the Indic languages, even for languages unseen during pretraining. Domain adaptation through continued fine-tuning improves results. Our study provides insights into utilizing large pretrained models to address the distinct challenges of MT of Indian languages.
%U https://aclanthology.org/2023.icon-1.66
%P 663-669
Markdown (Informal)
[Can Big Models Help Diverse Languages? Investigating Large Pretrained Multilingual Models for Machine Translation of Indian Languages](https://aclanthology.org/2023.icon-1.66) (Singh et al., ICON 2023)
ACL