@inproceedings{mahmud-etal-2026-argonauts,
title = "The Argonauts at {S}em{E}val-2026 Task 9: Multilingual Polarization Detection and Classification Using {LLM} Prompting and Transformer Fine-Tuning",
author = "Mahmud, Sha Newaz and
Bhattacharjee, Sajib and
Hossan, Md. Refaj and
Ahmed, Kawsar and
Hoque, Mohammed Moshiul",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.366/",
pages = "2920--2931",
ISBN = "979-8-89176-414-9",
abstract = "Online polarization, defined as the pronounced division of public opinion into antagonistic groups, poses a significant threat to social cohesion. Automatic detection of polarization across diverse languages and cultures is essential for effective monitoring of online discourse. The challenge extends beyond identifying hate speech to recognizing more nuanced forms, including negative stereotypes, attribution of blame, and dehumanization. This work addresses SemEval-2026 Task 9, which focuses on detecting polarization in multiple languages. Specifically, Subtask 1 involves binary classification of message polarization, while Subtask 2 requires assigning multiple polarization labels in English and Bengali. For Subtask 1, Qwen3-14B is employed with structured few-shot prompting in 4-bit mode, yielding test macro-F1 scores of 0.847 for Bengali (4th place) and 0.808 for English (9th place). For Subtask 2, XLM-RoBERTa-large and RoBERTa-base are fine-tuned using an uneven loss ({\ensuremath{\gamma}}+ = 1, {\ensuremath{\gamma}}{\ensuremath{-}} =4) and label-specific thresholds, which increase development macro F1 by up to 24.6 points. The final test macro F1 for English is 0.454 (21st place). Analysis indicates that large language model prompting enhances binary polarization detection, while threshold adjustment is critical for addressing class imbalance in multi-label tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahmud-etal-2026-argonauts">
<titleInfo>
<title>The Argonauts at SemEval-2026 Task 9: Multilingual Polarization Detection and Classification Using LLM Prompting and Transformer Fine-Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sha</namePart>
<namePart type="given">Newaz</namePart>
<namePart type="family">Mahmud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajib</namePart>
<namePart type="family">Bhattacharjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Refaj</namePart>
<namePart type="family">Hossan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kawsar</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Moshiul</namePart>
<namePart type="family">Hoque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Online polarization, defined as the pronounced division of public opinion into antagonistic groups, poses a significant threat to social cohesion. Automatic detection of polarization across diverse languages and cultures is essential for effective monitoring of online discourse. The challenge extends beyond identifying hate speech to recognizing more nuanced forms, including negative stereotypes, attribution of blame, and dehumanization. This work addresses SemEval-2026 Task 9, which focuses on detecting polarization in multiple languages. Specifically, Subtask 1 involves binary classification of message polarization, while Subtask 2 requires assigning multiple polarization labels in English and Bengali. For Subtask 1, Qwen3-14B is employed with structured few-shot prompting in 4-bit mode, yielding test macro-F1 scores of 0.847 for Bengali (4th place) and 0.808 for English (9th place). For Subtask 2, XLM-RoBERTa-large and RoBERTa-base are fine-tuned using an uneven loss (\ensuremathγ+ = 1, \ensuremathγ\ensuremath- =4) and label-specific thresholds, which increase development macro F1 by up to 24.6 points. The final test macro F1 for English is 0.454 (21st place). Analysis indicates that large language model prompting enhances binary polarization detection, while threshold adjustment is critical for addressing class imbalance in multi-label tasks.</abstract>
<identifier type="citekey">mahmud-etal-2026-argonauts</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.366/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2920</start>
<end>2931</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Argonauts at SemEval-2026 Task 9: Multilingual Polarization Detection and Classification Using LLM Prompting and Transformer Fine-Tuning
%A Mahmud, Sha Newaz
%A Bhattacharjee, Sajib
%A Hossan, Md. Refaj
%A Ahmed, Kawsar
%A Hoque, Mohammed Moshiul
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F mahmud-etal-2026-argonauts
%X Online polarization, defined as the pronounced division of public opinion into antagonistic groups, poses a significant threat to social cohesion. Automatic detection of polarization across diverse languages and cultures is essential for effective monitoring of online discourse. The challenge extends beyond identifying hate speech to recognizing more nuanced forms, including negative stereotypes, attribution of blame, and dehumanization. This work addresses SemEval-2026 Task 9, which focuses on detecting polarization in multiple languages. Specifically, Subtask 1 involves binary classification of message polarization, while Subtask 2 requires assigning multiple polarization labels in English and Bengali. For Subtask 1, Qwen3-14B is employed with structured few-shot prompting in 4-bit mode, yielding test macro-F1 scores of 0.847 for Bengali (4th place) and 0.808 for English (9th place). For Subtask 2, XLM-RoBERTa-large and RoBERTa-base are fine-tuned using an uneven loss (\ensuremathγ+ = 1, \ensuremathγ\ensuremath- =4) and label-specific thresholds, which increase development macro F1 by up to 24.6 points. The final test macro F1 for English is 0.454 (21st place). Analysis indicates that large language model prompting enhances binary polarization detection, while threshold adjustment is critical for addressing class imbalance in multi-label tasks.
%U https://aclanthology.org/2026.semeval-1.366/
%P 2920-2931
Markdown (Informal)
[The Argonauts at SemEval-2026 Task 9: Multilingual Polarization Detection and Classification Using LLM Prompting and Transformer Fine-Tuning](https://aclanthology.org/2026.semeval-1.366/) (Mahmud et al., SemEval 2026)
ACL