@inproceedings{islam-etal-2026-bornodrishti,
title = "{B}orno{D}rishti: Leveraging Vision Encoders and Domain-Adaptive Learning for {B}angla {OCR} on Diverse Documents",
author = "Islam, S M Jishanul and
Hasan, Md Mehedi and
Ovi, Masbul Haider and
Rabby, Akm Shahariar Azad and
Rahman, Fuad",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-industry.20/",
pages = "278--286",
ISBN = "979-8-89176-384-5",
abstract = "OCR for Bangla scripts remains a challenging problem, with existing solutions limited to single-domain processing. Current approaches lack a unified vision encoder that can understand diverse Bangla script variations, hindering practical deployment. We present BornoDrishti, the first unified OCR system based on the vision transformer that accurately recognizes both printed and handwritten Bangla scripts within a single model. Our approach introduces a novel domain objective that enables the model to learn domain-invariant representations while preserving script-specific features, eliminating the need for separate domain experts. BornoDrishti achieves competitive accuracy across both domains, setting state-of-the-art performance for printed scripts and demonstrating that a single unified model can match or exceed specialized uni-domain systems. We evaluate our model against state-of-the-art domain-specific and cross-domain OCR systems. This work establishes a foundation for advancing practical applications by using a unified multi-domain OCR system for complex Bangla scripts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="islam-etal-2026-bornodrishti">
<titleInfo>
<title>BornoDrishti: Leveraging Vision Encoders and Domain-Adaptive Learning for Bangla OCR on Diverse Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">S</namePart>
<namePart type="given">M</namePart>
<namePart type="given">Jishanul</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Mehedi</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masbul</namePart>
<namePart type="given">Haider</namePart>
<namePart type="family">Ovi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akm</namePart>
<namePart type="given">Shahariar</namePart>
<namePart type="given">Azad</namePart>
<namePart type="family">Rabby</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fuad</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yevgen</namePart>
<namePart type="family">Matusevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gülşen</namePart>
<namePart type="family">Eryiğit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-384-5</identifier>
</relatedItem>
<abstract>OCR for Bangla scripts remains a challenging problem, with existing solutions limited to single-domain processing. Current approaches lack a unified vision encoder that can understand diverse Bangla script variations, hindering practical deployment. We present BornoDrishti, the first unified OCR system based on the vision transformer that accurately recognizes both printed and handwritten Bangla scripts within a single model. Our approach introduces a novel domain objective that enables the model to learn domain-invariant representations while preserving script-specific features, eliminating the need for separate domain experts. BornoDrishti achieves competitive accuracy across both domains, setting state-of-the-art performance for printed scripts and demonstrating that a single unified model can match or exceed specialized uni-domain systems. We evaluate our model against state-of-the-art domain-specific and cross-domain OCR systems. This work establishes a foundation for advancing practical applications by using a unified multi-domain OCR system for complex Bangla scripts.</abstract>
<identifier type="citekey">islam-etal-2026-bornodrishti</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-industry.20/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>278</start>
<end>286</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BornoDrishti: Leveraging Vision Encoders and Domain-Adaptive Learning for Bangla OCR on Diverse Documents
%A Islam, S. M. Jishanul
%A Hasan, Md Mehedi
%A Ovi, Masbul Haider
%A Rabby, Akm Shahariar Azad
%A Rahman, Fuad
%Y Matusevych, Yevgen
%Y Eryiğit, Gülşen
%Y Aletras, Nikolaos
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-384-5
%F islam-etal-2026-bornodrishti
%X OCR for Bangla scripts remains a challenging problem, with existing solutions limited to single-domain processing. Current approaches lack a unified vision encoder that can understand diverse Bangla script variations, hindering practical deployment. We present BornoDrishti, the first unified OCR system based on the vision transformer that accurately recognizes both printed and handwritten Bangla scripts within a single model. Our approach introduces a novel domain objective that enables the model to learn domain-invariant representations while preserving script-specific features, eliminating the need for separate domain experts. BornoDrishti achieves competitive accuracy across both domains, setting state-of-the-art performance for printed scripts and demonstrating that a single unified model can match or exceed specialized uni-domain systems. We evaluate our model against state-of-the-art domain-specific and cross-domain OCR systems. This work establishes a foundation for advancing practical applications by using a unified multi-domain OCR system for complex Bangla scripts.
%U https://aclanthology.org/2026.eacl-industry.20/
%P 278-286
Markdown (Informal)
[BornoDrishti: Leveraging Vision Encoders and Domain-Adaptive Learning for Bangla OCR on Diverse Documents](https://aclanthology.org/2026.eacl-industry.20/) (Islam et al., EACL 2026)
ACL