@inproceedings{huynh-etal-2026-hcmus,
title = "{HCMUS}{\_}{P}rison{D}ilemma at {A}bjad{A}uthor{ID} Shared Task: Less is More with Base Models",
author = "Huynh, Trung Kiet and
Dao Sy, Duy Minh and
Tran, Nguyen Chi and
Hoa, Pham Phu and
Quy, Nguyen Lam Phu and
Tran, Truong Bao",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.54/",
pages = "448--452",
abstract = "We present our approach to the AbjadNLP 2026 Arabic Authorship Identification shared task, achieving 4th place. Our key finding is that AraBERT-base (110M) outperforms AraBERT-large (340M) on the test set with macro F1 of 0.8449 versus 0.8096, despite lower validation scores. We handle long passages via sliding window chunking with mean pooling, and use a two-stage classification head with dual dropout for regularization. Per-class analysis reveals that translated works achieve perfect F1 while classical poets remain challenging due to shared formal structures. Our results challenge the ``scale is all you need'' assumption for stylometric tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huynh-etal-2026-hcmus">
<titleInfo>
<title>HCMUS_PrisonDilemma at AbjadAuthorID Shared Task: Less is More with Base Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trung</namePart>
<namePart type="given">Kiet</namePart>
<namePart type="family">Huynh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Duy</namePart>
<namePart type="given">Minh</namePart>
<namePart type="family">Dao Sy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="given">Chi</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pham</namePart>
<namePart type="given">Phu</namePart>
<namePart type="family">Hoa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="given">Lam</namePart>
<namePart type="given">Phu</namePart>
<namePart type="family">Quy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Truong</namePart>
<namePart type="given">Bao</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our approach to the AbjadNLP 2026 Arabic Authorship Identification shared task, achieving 4th place. Our key finding is that AraBERT-base (110M) outperforms AraBERT-large (340M) on the test set with macro F1 of 0.8449 versus 0.8096, despite lower validation scores. We handle long passages via sliding window chunking with mean pooling, and use a two-stage classification head with dual dropout for regularization. Per-class analysis reveals that translated works achieve perfect F1 while classical poets remain challenging due to shared formal structures. Our results challenge the “scale is all you need” assumption for stylometric tasks.</abstract>
<identifier type="citekey">huynh-etal-2026-hcmus</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.54/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>448</start>
<end>452</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HCMUS_PrisonDilemma at AbjadAuthorID Shared Task: Less is More with Base Models
%A Huynh, Trung Kiet
%A Dao Sy, Duy Minh
%A Tran, Nguyen Chi
%A Hoa, Pham Phu
%A Quy, Nguyen Lam Phu
%A Tran, Truong Bao
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F huynh-etal-2026-hcmus
%X We present our approach to the AbjadNLP 2026 Arabic Authorship Identification shared task, achieving 4th place. Our key finding is that AraBERT-base (110M) outperforms AraBERT-large (340M) on the test set with macro F1 of 0.8449 versus 0.8096, despite lower validation scores. We handle long passages via sliding window chunking with mean pooling, and use a two-stage classification head with dual dropout for regularization. Per-class analysis reveals that translated works achieve perfect F1 while classical poets remain challenging due to shared formal structures. Our results challenge the “scale is all you need” assumption for stylometric tasks.
%U https://aclanthology.org/2026.abjadnlp-1.54/
%P 448-452
Markdown (Informal)
[HCMUS_PrisonDilemma at AbjadAuthorID Shared Task: Less is More with Base Models](https://aclanthology.org/2026.abjadnlp-1.54/) (Huynh et al., AbjadNLP 2026)
ACL