@inproceedings{tanjila-etal-2025-bengali,
title = "{B}engali {C}hart{S}umm: A Benchmark Dataset and study on feasibility of Large Language Models on {B}engali Chart to Text Summarization",
author = "Tanjila, Nahida Akter and
Poushi, Afrin Sultana and
Farhan, Sazid Abdullah and
Kamal, Abu Raihan Mostofa and
Hossain, Md. Azam and
Ashmafee, Md. Hamjajul",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.4/",
pages = "35--45",
abstract = "In today`s data-driven world, effectively organizing and presenting data is challenging, particularly for non-experts. While tabular formats structure data, they often lack intuitive insights; charts, however, prefer accessible and impactful visual summaries. Although recent advancements in NLP, powered by large language models (LLMs), have primarily beneʐʒted high-resource languages like English, low-resource languages such as Bengali{---}spoken by millions globally{---}still face significant data limitations. This research addresses this gap by introducing {\textquotedblleft}Bengali ChartSumm,{\textquotedblright} a benchmark dataset with 4,100 Bengali chart images, metadata, and summaries. This dataset facilitates the analysis of LLMs (mT5, BanglaT5, Gemma) in Bengali chart-to-text summarization, offering essential baselines and evaluations that enhance NLP research for low-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tanjila-etal-2025-bengali">
<titleInfo>
<title>Bengali ChartSumm: A Benchmark Dataset and study on feasibility of Large Language Models on Bengali Chart to Text Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nahida</namePart>
<namePart type="given">Akter</namePart>
<namePart type="family">Tanjila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afrin</namePart>
<namePart type="given">Sultana</namePart>
<namePart type="family">Poushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sazid</namePart>
<namePart type="given">Abdullah</namePart>
<namePart type="family">Farhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abu</namePart>
<namePart type="given">Raihan</namePart>
<namePart type="given">Mostofa</namePart>
<namePart type="family">Kamal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Azam</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Hamjajul</namePart>
<namePart type="family">Ashmafee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In today‘s data-driven world, effectively organizing and presenting data is challenging, particularly for non-experts. While tabular formats structure data, they often lack intuitive insights; charts, however, prefer accessible and impactful visual summaries. Although recent advancements in NLP, powered by large language models (LLMs), have primarily beneʐʒted high-resource languages like English, low-resource languages such as Bengali—spoken by millions globally—still face significant data limitations. This research addresses this gap by introducing “Bengali ChartSumm,” a benchmark dataset with 4,100 Bengali chart images, metadata, and summaries. This dataset facilitates the analysis of LLMs (mT5, BanglaT5, Gemma) in Bengali chart-to-text summarization, offering essential baselines and evaluations that enhance NLP research for low-resource languages.</abstract>
<identifier type="citekey">tanjila-etal-2025-bengali</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.4/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>35</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bengali ChartSumm: A Benchmark Dataset and study on feasibility of Large Language Models on Bengali Chart to Text Summarization
%A Tanjila, Nahida Akter
%A Poushi, Afrin Sultana
%A Farhan, Sazid Abdullah
%A Kamal, Abu Raihan Mostofa
%A Hossain, Md. Azam
%A Ashmafee, Md. Hamjajul
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F tanjila-etal-2025-bengali
%X In today‘s data-driven world, effectively organizing and presenting data is challenging, particularly for non-experts. While tabular formats structure data, they often lack intuitive insights; charts, however, prefer accessible and impactful visual summaries. Although recent advancements in NLP, powered by large language models (LLMs), have primarily beneʐʒted high-resource languages like English, low-resource languages such as Bengali—spoken by millions globally—still face significant data limitations. This research addresses this gap by introducing “Bengali ChartSumm,” a benchmark dataset with 4,100 Bengali chart images, metadata, and summaries. This dataset facilitates the analysis of LLMs (mT5, BanglaT5, Gemma) in Bengali chart-to-text summarization, offering essential baselines and evaluations that enhance NLP research for low-resource languages.
%U https://aclanthology.org/2025.chipsal-1.4/
%P 35-45
Markdown (Informal)
[Bengali ChartSumm: A Benchmark Dataset and study on feasibility of Large Language Models on Bengali Chart to Text Summarization](https://aclanthology.org/2025.chipsal-1.4/) (Tanjila et al., CHiPSAL 2025)
ACL