@inproceedings{hasan-etal-2025-gradet,
title = "{G}ra{D}e{T}-{HTR}: A Resource-Efficient {B}engali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer",
author = "Hasan, Md. Mahmudul and
Choudhury, Ahmed Nesar Tahsin and
Hasan, Mahmudul and
Khan, Md Mosaddek",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.52/",
pages = "696--706",
ISBN = "979-8-89176-334-0",
abstract = "Despite Bengali being the sixth most spoken language in the world, handwritten text recognition (HTR) systems for Bengali remain severely underdeveloped. The complexity of Bengali script{---}featuring conjuncts, diacritics, and highly variable handwriting styles{---}combined with a scarcity of annotated datasets makes this task particularly challenging. We present **GraDeT-HTR**, a resource-efficient Bengali handwritten text recognition system based on a **Gra**pheme-aware **De**coder-only **T**ransformer architecture. To address the unique challenges of Bengali script, we augment the performance of a decoder-only transformer by integrating a grapheme-based tokenizer and demonstrate that it significantly improves recognition accuracy compared to conventional subword tokenizers. Our model is pretrained on large-scale synthetic data and fine-tuned on real human-annotated samples, achieving state-of-the-art performance on multiple benchmark datasets."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hasan-etal-2025-gradet">
<titleInfo>
<title>GraDeT-HTR: A Resource-Efficient Bengali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Mahmudul</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="given">Nesar</namePart>
<namePart type="given">Tahsin</namePart>
<namePart type="family">Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmudul</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Mosaddek</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Schulam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-334-0</identifier>
</relatedItem>
<abstract>Despite Bengali being the sixth most spoken language in the world, handwritten text recognition (HTR) systems for Bengali remain severely underdeveloped. The complexity of Bengali script—featuring conjuncts, diacritics, and highly variable handwriting styles—combined with a scarcity of annotated datasets makes this task particularly challenging. We present **GraDeT-HTR**, a resource-efficient Bengali handwritten text recognition system based on a **Gra**pheme-aware **De**coder-only **T**ransformer architecture. To address the unique challenges of Bengali script, we augment the performance of a decoder-only transformer by integrating a grapheme-based tokenizer and demonstrate that it significantly improves recognition accuracy compared to conventional subword tokenizers. Our model is pretrained on large-scale synthetic data and fine-tuned on real human-annotated samples, achieving state-of-the-art performance on multiple benchmark datasets.</abstract>
<identifier type="citekey">hasan-etal-2025-gradet</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-demos.52/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>696</start>
<end>706</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GraDeT-HTR: A Resource-Efficient Bengali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer
%A Hasan, Md. Mahmudul
%A Choudhury, Ahmed Nesar Tahsin
%A Hasan, Mahmudul
%A Khan, Md Mosaddek
%Y Habernal, Ivan
%Y Schulam, Peter
%Y Tiedemann, Jörg
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-334-0
%F hasan-etal-2025-gradet
%X Despite Bengali being the sixth most spoken language in the world, handwritten text recognition (HTR) systems for Bengali remain severely underdeveloped. The complexity of Bengali script—featuring conjuncts, diacritics, and highly variable handwriting styles—combined with a scarcity of annotated datasets makes this task particularly challenging. We present **GraDeT-HTR**, a resource-efficient Bengali handwritten text recognition system based on a **Gra**pheme-aware **De**coder-only **T**ransformer architecture. To address the unique challenges of Bengali script, we augment the performance of a decoder-only transformer by integrating a grapheme-based tokenizer and demonstrate that it significantly improves recognition accuracy compared to conventional subword tokenizers. Our model is pretrained on large-scale synthetic data and fine-tuned on real human-annotated samples, achieving state-of-the-art performance on multiple benchmark datasets.
%U https://aclanthology.org/2025.emnlp-demos.52/
%P 696-706
Markdown (Informal)
[GraDeT-HTR: A Resource-Efficient Bengali Handwritten Text Recognition System utilizing Grapheme-based Tokenizer and Decoder-only Transformer](https://aclanthology.org/2025.emnlp-demos.52/) (Hasan et al., EMNLP 2025)
ACL