@inproceedings{bhattacharjee-etal-2026-argonauts,
title = "The Argonauts at {S}em{E}val 2026 Task 6: Large Language Models for Response Clarity Classification: Prompting, Fine-Tuning, and Data-Centric Approaches",
author = "Bhattacharjee, Sajib and
Mahmud, Sha Newaz and
Hossan, Md. Refaj and
Ahmed, Kawsar and
Hoque, Mohammed Moshiul",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.344/",
pages = "2729--2743",
ISBN = "979-8-89176-414-9",
abstract = "Detecting equivocation is essential, as indirect or evasive responses can shape public perception, influence political narratives, and undermine transparency in democratic discourse. To address the challenge of detecting evasive political responses on digital platforms, participation in the CLARITY SemEval-2026 Task was undertaken, which focuses on (i) clarity-level classification and (ii) fine-grained evasion-type classification in political question-answer contexts. This study introduces a data-centric framework that systematically examines the effects of class distribution and refinement strategies on the performance of Large Language Models (LLMs). A distribution-aware, LLM-augmented dataset was constructed by selectively paraphrasing minority-class instances to enhance class balance, and its performance was benchmarked against full, rebalanced, and undersampled training configurations. To comprehensively assess the proposed method, Qwen3-14B, Phi-4, Gemma-2 9B, and Mistral 7B were evaluated in in-context learning (ICL) settings (zero-shot and few-shot) and with LoRA fine-tuning. Experimental results indicate that fine-tuning Phi-4 with class rebalancing yields strong performance, achieving 74.77{\%} on Subtask-1 and 51.55{\%} on Subtask-2. Consequently, the system ranked 21st in Subtask-1 and 22nd in Subtask-2 on the official evaluation leaderboard."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhattacharjee-etal-2026-argonauts">
<titleInfo>
<title>The Argonauts at SemEval 2026 Task 6: Large Language Models for Response Clarity Classification: Prompting, Fine-Tuning, and Data-Centric Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sajib</namePart>
<namePart type="family">Bhattacharjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sha</namePart>
<namePart type="given">Newaz</namePart>
<namePart type="family">Mahmud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Refaj</namePart>
<namePart type="family">Hossan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kawsar</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Moshiul</namePart>
<namePart type="family">Hoque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Detecting equivocation is essential, as indirect or evasive responses can shape public perception, influence political narratives, and undermine transparency in democratic discourse. To address the challenge of detecting evasive political responses on digital platforms, participation in the CLARITY SemEval-2026 Task was undertaken, which focuses on (i) clarity-level classification and (ii) fine-grained evasion-type classification in political question-answer contexts. This study introduces a data-centric framework that systematically examines the effects of class distribution and refinement strategies on the performance of Large Language Models (LLMs). A distribution-aware, LLM-augmented dataset was constructed by selectively paraphrasing minority-class instances to enhance class balance, and its performance was benchmarked against full, rebalanced, and undersampled training configurations. To comprehensively assess the proposed method, Qwen3-14B, Phi-4, Gemma-2 9B, and Mistral 7B were evaluated in in-context learning (ICL) settings (zero-shot and few-shot) and with LoRA fine-tuning. Experimental results indicate that fine-tuning Phi-4 with class rebalancing yields strong performance, achieving 74.77% on Subtask-1 and 51.55% on Subtask-2. Consequently, the system ranked 21st in Subtask-1 and 22nd in Subtask-2 on the official evaluation leaderboard.</abstract>
<identifier type="citekey">bhattacharjee-etal-2026-argonauts</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.344/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2729</start>
<end>2743</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Argonauts at SemEval 2026 Task 6: Large Language Models for Response Clarity Classification: Prompting, Fine-Tuning, and Data-Centric Approaches
%A Bhattacharjee, Sajib
%A Mahmud, Sha Newaz
%A Hossan, Md. Refaj
%A Ahmed, Kawsar
%A Hoque, Mohammed Moshiul
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F bhattacharjee-etal-2026-argonauts
%X Detecting equivocation is essential, as indirect or evasive responses can shape public perception, influence political narratives, and undermine transparency in democratic discourse. To address the challenge of detecting evasive political responses on digital platforms, participation in the CLARITY SemEval-2026 Task was undertaken, which focuses on (i) clarity-level classification and (ii) fine-grained evasion-type classification in political question-answer contexts. This study introduces a data-centric framework that systematically examines the effects of class distribution and refinement strategies on the performance of Large Language Models (LLMs). A distribution-aware, LLM-augmented dataset was constructed by selectively paraphrasing minority-class instances to enhance class balance, and its performance was benchmarked against full, rebalanced, and undersampled training configurations. To comprehensively assess the proposed method, Qwen3-14B, Phi-4, Gemma-2 9B, and Mistral 7B were evaluated in in-context learning (ICL) settings (zero-shot and few-shot) and with LoRA fine-tuning. Experimental results indicate that fine-tuning Phi-4 with class rebalancing yields strong performance, achieving 74.77% on Subtask-1 and 51.55% on Subtask-2. Consequently, the system ranked 21st in Subtask-1 and 22nd in Subtask-2 on the official evaluation leaderboard.
%U https://aclanthology.org/2026.semeval-1.344/
%P 2729-2743
Markdown (Informal)
[The Argonauts at SemEval 2026 Task 6: Large Language Models for Response Clarity Classification: Prompting, Fine-Tuning, and Data-Centric Approaches](https://aclanthology.org/2026.semeval-1.344/) (Bhattacharjee et al., SemEval 2026)
ACL
- Sajib Bhattacharjee, Sha Newaz Mahmud, Md. Refaj Hossan, Kawsar Ahmed, and Mohammed Moshiul Hoque. 2026. The Argonauts at SemEval 2026 Task 6: Large Language Models for Response Clarity Classification: Prompting, Fine-Tuning, and Data-Centric Approaches. In Proceedings of the 20th International Workshop on Semantic Evaluation (2026), pages 2729–2743, San Diego, California, USA. Association for Computational Linguistics.