@inproceedings{jaisy-2026-metaswarm,
title = "{M}eta{S}warm at {A}bjad{M}ed: Forensic Optimization and Class-Balanced Discovery for Medical Diglossia in Abjad Scripts",
author = "Jaisy, Rahul",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.21/",
pages = "144--148",
abstract = "The classification of diglossic medical text presents a high-dimensional challenge defined by extreme class imbalance (N = 82) and the orthographic ambiguity of unvocalized Abjad scripts. While standard supervised learning often collapses into majority-class prediction due to the ``Long Tail'' distribution, we intro- duce a Human-in-the-Loop Forensic Opti- mization framework. Unlike static end-to-end pipelines, our approach decouples strategic hy- perparameter tuning from high-throughput tac- tical execution (Elastic Compute). We lever- age a rigorous Class-Balanced Focal Loss (CBFL) derived from the ``Effective Number of Samples'' theory (En) to stabilize the de- cision manifold against stochastic class domi- nance. Using a CAMELBERT-DA backbone optimized via a custom weighted trainer on Dual H200 GPUs, our system achieved a ro- bust Public Leaderboard score of 0.3588. We further perform a ``Linguistic Error Topology'' analysis, utilizing UMAP projections and atten- tion saliency, to demonstrate that generalization gaps are driven by dialectal ``Constraint Drift'' rather than stochastic model failure."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jaisy-2026-metaswarm">
<titleInfo>
<title>MetaSwarm at AbjadMed: Forensic Optimization and Class-Balanced Discovery for Medical Diglossia in Abjad Scripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Jaisy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The classification of diglossic medical text presents a high-dimensional challenge defined by extreme class imbalance (N = 82) and the orthographic ambiguity of unvocalized Abjad scripts. While standard supervised learning often collapses into majority-class prediction due to the “Long Tail” distribution, we intro- duce a Human-in-the-Loop Forensic Opti- mization framework. Unlike static end-to-end pipelines, our approach decouples strategic hy- perparameter tuning from high-throughput tac- tical execution (Elastic Compute). We lever- age a rigorous Class-Balanced Focal Loss (CBFL) derived from the “Effective Number of Samples” theory (En) to stabilize the de- cision manifold against stochastic class domi- nance. Using a CAMELBERT-DA backbone optimized via a custom weighted trainer on Dual H200 GPUs, our system achieved a ro- bust Public Leaderboard score of 0.3588. We further perform a “Linguistic Error Topology” analysis, utilizing UMAP projections and atten- tion saliency, to demonstrate that generalization gaps are driven by dialectal “Constraint Drift” rather than stochastic model failure.</abstract>
<identifier type="citekey">jaisy-2026-metaswarm</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.21/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>144</start>
<end>148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MetaSwarm at AbjadMed: Forensic Optimization and Class-Balanced Discovery for Medical Diglossia in Abjad Scripts
%A Jaisy, Rahul
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F jaisy-2026-metaswarm
%X The classification of diglossic medical text presents a high-dimensional challenge defined by extreme class imbalance (N = 82) and the orthographic ambiguity of unvocalized Abjad scripts. While standard supervised learning often collapses into majority-class prediction due to the “Long Tail” distribution, we intro- duce a Human-in-the-Loop Forensic Opti- mization framework. Unlike static end-to-end pipelines, our approach decouples strategic hy- perparameter tuning from high-throughput tac- tical execution (Elastic Compute). We lever- age a rigorous Class-Balanced Focal Loss (CBFL) derived from the “Effective Number of Samples” theory (En) to stabilize the de- cision manifold against stochastic class domi- nance. Using a CAMELBERT-DA backbone optimized via a custom weighted trainer on Dual H200 GPUs, our system achieved a ro- bust Public Leaderboard score of 0.3588. We further perform a “Linguistic Error Topology” analysis, utilizing UMAP projections and atten- tion saliency, to demonstrate that generalization gaps are driven by dialectal “Constraint Drift” rather than stochastic model failure.
%U https://aclanthology.org/2026.abjadnlp-1.21/
%P 144-148
Markdown (Informal)
[MetaSwarm at AbjadMed: Forensic Optimization and Class-Balanced Discovery for Medical Diglossia in Abjad Scripts](https://aclanthology.org/2026.abjadnlp-1.21/) (Jaisy, AbjadNLP 2026)
ACL