@inproceedings{fahim-etal-2026-perceptionlab,
title = "{P}erception{L}ab at {P}sy{D}ef{D}etect: Overcoming Extreme Response Bias in {LLM}s via Rubric-Grounded Retrieval and Supervised Clinical Reasoning Distillation for Fine-Grained Ordinal Classification",
author = "Fahim, Tamjid and
Johan, Syed and
Bin Maksud, Saad",
editor = "Gupta, Deepak and
Demner-Fushman, Dina",
booktitle = "Proceedings of the {B}io{NLP} 2026 (Shared Tasks)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-2.17/",
pages = "109--122",
ISBN = "979-8-89176-435-4",
abstract = "Automating the classification of psychological defense mechanisms is a critical yet challenging frontier in clinical natural language processing. General-purpose Large Language Models (LLMs) struggle to apply fine-grained ordinal frameworks like the Defense Mechanism Rating Scales due to the implicit nature of clinical cues and a fundamental clinical reasoning gap. These models exhibit severe extreme response bias, systematically gravitating toward the scale{'}s endpoints while failing to resolve nuanced, mid-level defenses. In this paper, we present our third-place system for the PsyDefDetect Shared Task at BioNLP 2026, designed specifically to overcome this failure mode. We propose a hybrid architecture that synergizes label-flattened generative retrieval with an LLM classifier fine-tuned via the distillation of supervised clinical reasoning traces. This dual approach, grounding decisions in rubric criteria while leveraging task-specific supervision, successfully mitigates the observed bias, achieving an accuracy of 67.37{\%} and a macro-F1 of 39.56{\%}. Our work provides empirical evidence that tightly integrating targeted clinical supervision with dynamic rubric-grounded retrieval significantly outperforms the raw parameter scale of un-tuned foundation models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fahim-etal-2026-perceptionlab">
<titleInfo>
<title>PerceptionLab at PsyDefDetect: Overcoming Extreme Response Bias in LLMs via Rubric-Grounded Retrieval and Supervised Clinical Reasoning Distillation for Fine-Grained Ordinal Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tamjid</namePart>
<namePart type="family">Fahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syed</namePart>
<namePart type="family">Johan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Bin Maksud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the BioNLP 2026 (Shared Tasks)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-435-4</identifier>
</relatedItem>
<abstract>Automating the classification of psychological defense mechanisms is a critical yet challenging frontier in clinical natural language processing. General-purpose Large Language Models (LLMs) struggle to apply fine-grained ordinal frameworks like the Defense Mechanism Rating Scales due to the implicit nature of clinical cues and a fundamental clinical reasoning gap. These models exhibit severe extreme response bias, systematically gravitating toward the scale’s endpoints while failing to resolve nuanced, mid-level defenses. In this paper, we present our third-place system for the PsyDefDetect Shared Task at BioNLP 2026, designed specifically to overcome this failure mode. We propose a hybrid architecture that synergizes label-flattened generative retrieval with an LLM classifier fine-tuned via the distillation of supervised clinical reasoning traces. This dual approach, grounding decisions in rubric criteria while leveraging task-specific supervision, successfully mitigates the observed bias, achieving an accuracy of 67.37% and a macro-F1 of 39.56%. Our work provides empirical evidence that tightly integrating targeted clinical supervision with dynamic rubric-grounded retrieval significantly outperforms the raw parameter scale of un-tuned foundation models.</abstract>
<identifier type="citekey">fahim-etal-2026-perceptionlab</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-2.17/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>109</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PerceptionLab at PsyDefDetect: Overcoming Extreme Response Bias in LLMs via Rubric-Grounded Retrieval and Supervised Clinical Reasoning Distillation for Fine-Grained Ordinal Classification
%A Fahim, Tamjid
%A Johan, Syed
%A Bin Maksud, Saad
%Y Gupta, Deepak
%Y Demner-Fushman, Dina
%S Proceedings of the BioNLP 2026 (Shared Tasks)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-435-4
%F fahim-etal-2026-perceptionlab
%X Automating the classification of psychological defense mechanisms is a critical yet challenging frontier in clinical natural language processing. General-purpose Large Language Models (LLMs) struggle to apply fine-grained ordinal frameworks like the Defense Mechanism Rating Scales due to the implicit nature of clinical cues and a fundamental clinical reasoning gap. These models exhibit severe extreme response bias, systematically gravitating toward the scale’s endpoints while failing to resolve nuanced, mid-level defenses. In this paper, we present our third-place system for the PsyDefDetect Shared Task at BioNLP 2026, designed specifically to overcome this failure mode. We propose a hybrid architecture that synergizes label-flattened generative retrieval with an LLM classifier fine-tuned via the distillation of supervised clinical reasoning traces. This dual approach, grounding decisions in rubric criteria while leveraging task-specific supervision, successfully mitigates the observed bias, achieving an accuracy of 67.37% and a macro-F1 of 39.56%. Our work provides empirical evidence that tightly integrating targeted clinical supervision with dynamic rubric-grounded retrieval significantly outperforms the raw parameter scale of un-tuned foundation models.
%U https://aclanthology.org/2026.bionlp-2.17/
%P 109-122
Markdown (Informal)
[PerceptionLab at PsyDefDetect: Overcoming Extreme Response Bias in LLMs via Rubric-Grounded Retrieval and Supervised Clinical Reasoning Distillation for Fine-Grained Ordinal Classification](https://aclanthology.org/2026.bionlp-2.17/) (Fahim et al., BioNLP 2026)
ACL