@inproceedings{dhamecha-etal-2025-team,
title = "Team Horizon at {BHASHA} Task 1: Multilingual {I}ndic{GEC} with Transformer-based Grammatical Error Correction Models",
author = "Dhamecha, Manav and
Jaat, Sunil and
Damor, Gaurav and
Mishra, Pruthwik",
editor = "Bhattacharya, Arnab and
Goyal, Pawan and
Ghosh, Saptarshi and
Ghosh, Kripabandhu",
booktitle = "Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bhasha-1.14/",
pages = "142--146",
ISBN = "979-8-89176-313-5",
abstract = "This paper presents Team Horizon{'}s approach to the BHASHA Shared Task 1: Indic Grammatical Error Correction (IndicGEC). We explore transformer-based multilingual models {---} mT5-small and IndicBART {---} to correct grammatical and semantic errors across five Indian languages: Bangla, Hindi, Tamil, Telugu, and Malayalam. Due to limited annotated data, we developed a synthetic data augmentation pipeline that introduces realistic linguistic errors under ten categories, simulating natural mistakes found in Indic scripts. Our fine-tuned models achieved competitive performance with GLEU scores of 86.03 (Tamil), 72.00 (Telugu), 82.69 (Bangla), 80.44 (Hindi), and 84.36 (Malayalam). We analyze the impact of dataset scaling, multilingual fine-tuning, and training epochs, showing that linguistically grounded augmentation can significantly improve grammatical correction accuracy in low-resource Indic languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dhamecha-etal-2025-team">
<titleInfo>
<title>Team Horizon at BHASHA Task 1: Multilingual IndicGEC with Transformer-based Grammatical Error Correction Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manav</namePart>
<namePart type="family">Dhamecha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunil</namePart>
<namePart type="family">Jaat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaurav</namePart>
<namePart type="family">Damor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pruthwik</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arnab</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawan</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saptarshi</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kripabandhu</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-313-5</identifier>
</relatedItem>
<abstract>This paper presents Team Horizon’s approach to the BHASHA Shared Task 1: Indic Grammatical Error Correction (IndicGEC). We explore transformer-based multilingual models — mT5-small and IndicBART — to correct grammatical and semantic errors across five Indian languages: Bangla, Hindi, Tamil, Telugu, and Malayalam. Due to limited annotated data, we developed a synthetic data augmentation pipeline that introduces realistic linguistic errors under ten categories, simulating natural mistakes found in Indic scripts. Our fine-tuned models achieved competitive performance with GLEU scores of 86.03 (Tamil), 72.00 (Telugu), 82.69 (Bangla), 80.44 (Hindi), and 84.36 (Malayalam). We analyze the impact of dataset scaling, multilingual fine-tuning, and training epochs, showing that linguistically grounded augmentation can significantly improve grammatical correction accuracy in low-resource Indic languages.</abstract>
<identifier type="citekey">dhamecha-etal-2025-team</identifier>
<location>
<url>https://aclanthology.org/2025.bhasha-1.14/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>142</start>
<end>146</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Team Horizon at BHASHA Task 1: Multilingual IndicGEC with Transformer-based Grammatical Error Correction Models
%A Dhamecha, Manav
%A Jaat, Sunil
%A Damor, Gaurav
%A Mishra, Pruthwik
%Y Bhattacharya, Arnab
%Y Goyal, Pawan
%Y Ghosh, Saptarshi
%Y Ghosh, Kripabandhu
%S Proceedings of the 1st Workshop on Benchmarks, Harmonization, Annotation, and Standardization for Human-Centric AI in Indian Languages (BHASHA 2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-313-5
%F dhamecha-etal-2025-team
%X This paper presents Team Horizon’s approach to the BHASHA Shared Task 1: Indic Grammatical Error Correction (IndicGEC). We explore transformer-based multilingual models — mT5-small and IndicBART — to correct grammatical and semantic errors across five Indian languages: Bangla, Hindi, Tamil, Telugu, and Malayalam. Due to limited annotated data, we developed a synthetic data augmentation pipeline that introduces realistic linguistic errors under ten categories, simulating natural mistakes found in Indic scripts. Our fine-tuned models achieved competitive performance with GLEU scores of 86.03 (Tamil), 72.00 (Telugu), 82.69 (Bangla), 80.44 (Hindi), and 84.36 (Malayalam). We analyze the impact of dataset scaling, multilingual fine-tuning, and training epochs, showing that linguistically grounded augmentation can significantly improve grammatical correction accuracy in low-resource Indic languages.
%U https://aclanthology.org/2025.bhasha-1.14/
%P 142-146
Markdown (Informal)
[Team Horizon at BHASHA Task 1: Multilingual IndicGEC with Transformer-based Grammatical Error Correction Models](https://aclanthology.org/2025.bhasha-1.14/) (Dhamecha et al., BHASHA 2025)
ACL