@inproceedings{bhattacharjee-etal-2023-banglanlg,
title = "{B}angla{NLG} and {B}angla{T}5: Benchmarks and Resources for Evaluating Low-Resource Natural Language Generation in {B}angla",
author = "Bhattacharjee, Abhik and
Hasan, Tahmid and
Ahmad, Wasi Uddin and
Shahriyar, Rifat",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-eacl.54",
doi = "10.18653/v1/2023.findings-eacl.54",
pages = "726--735",
abstract = "This work presents {`}BanglaNLG,{'} a comprehensive benchmark for evaluating natural language generation (NLG) models in Bangla, a widely spoken yet low-resource language. We aggregate six challenging conditional text generation tasks under the BanglaNLG benchmark, introducing a new dataset on dialogue generation in the process. Furthermore, using a clean corpus of 27.5 GB of Bangla data, we pretrain {`}BanglaT5{'}, a sequence-to-sequence Transformer language model for Bangla. BanglaT5 achieves state-of-the-art performance in all of these tasks, outperforming several multilingual models by up to 9{\%} absolute gain and 32{\%} relative gain. We are making the new dialogue dataset and the BanglaT5 model publicly available at \url{https://github.com/csebuetnlp/BanglaNLG} in the hope of advancing future research on Bangla NLG.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhattacharjee-etal-2023-banglanlg">
<titleInfo>
<title>BanglaNLG and BanglaT5: Benchmarks and Resources for Evaluating Low-Resource Natural Language Generation in Bangla</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Bhattacharjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tahmid</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wasi</namePart>
<namePart type="given">Uddin</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rifat</namePart>
<namePart type="family">Shahriyar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work presents ‘BanglaNLG,’ a comprehensive benchmark for evaluating natural language generation (NLG) models in Bangla, a widely spoken yet low-resource language. We aggregate six challenging conditional text generation tasks under the BanglaNLG benchmark, introducing a new dataset on dialogue generation in the process. Furthermore, using a clean corpus of 27.5 GB of Bangla data, we pretrain ‘BanglaT5’, a sequence-to-sequence Transformer language model for Bangla. BanglaT5 achieves state-of-the-art performance in all of these tasks, outperforming several multilingual models by up to 9% absolute gain and 32% relative gain. We are making the new dialogue dataset and the BanglaT5 model publicly available at https://github.com/csebuetnlp/BanglaNLG in the hope of advancing future research on Bangla NLG.</abstract>
<identifier type="citekey">bhattacharjee-etal-2023-banglanlg</identifier>
<identifier type="doi">10.18653/v1/2023.findings-eacl.54</identifier>
<location>
<url>https://aclanthology.org/2023.findings-eacl.54</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>726</start>
<end>735</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BanglaNLG and BanglaT5: Benchmarks and Resources for Evaluating Low-Resource Natural Language Generation in Bangla
%A Bhattacharjee, Abhik
%A Hasan, Tahmid
%A Ahmad, Wasi Uddin
%A Shahriyar, Rifat
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Findings of the Association for Computational Linguistics: EACL 2023
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F bhattacharjee-etal-2023-banglanlg
%X This work presents ‘BanglaNLG,’ a comprehensive benchmark for evaluating natural language generation (NLG) models in Bangla, a widely spoken yet low-resource language. We aggregate six challenging conditional text generation tasks under the BanglaNLG benchmark, introducing a new dataset on dialogue generation in the process. Furthermore, using a clean corpus of 27.5 GB of Bangla data, we pretrain ‘BanglaT5’, a sequence-to-sequence Transformer language model for Bangla. BanglaT5 achieves state-of-the-art performance in all of these tasks, outperforming several multilingual models by up to 9% absolute gain and 32% relative gain. We are making the new dialogue dataset and the BanglaT5 model publicly available at https://github.com/csebuetnlp/BanglaNLG in the hope of advancing future research on Bangla NLG.
%R 10.18653/v1/2023.findings-eacl.54
%U https://aclanthology.org/2023.findings-eacl.54
%U https://doi.org/10.18653/v1/2023.findings-eacl.54
%P 726-735
Markdown (Informal)
[BanglaNLG and BanglaT5: Benchmarks and Resources for Evaluating Low-Resource Natural Language Generation in Bangla](https://aclanthology.org/2023.findings-eacl.54) (Bhattacharjee et al., Findings 2023)
ACL