@inproceedings{choo-hong-2026-modulating,
title = "Modulating Multi-Label Tendency in Zero-Shot {LLM} Coding: The Effect of Output Structure on {CDSS} Feedback Analysis",
author = "Choo, Hyunwoo and
Hong, Sungsoo",
editor = {Danilova, Vera and
Kurfal{\i}, Murathan and
S{\"o}derfeldt, Ylva and
Reed, Julia and
Burchell, Andrew},
booktitle = "Proceedings of the 1st Workshop on Linguistic Analysis for Health ({H}ea{L}ing 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.healing-1.14/",
pages = "172--179",
ISBN = "979-8-89176-367-8",
abstract = "Large language models (LLMs) often default to single-label classification in zero-shot multi-label tasks{---}a tendency we term ``conservative default''. While few-shot prompting mitigates this, it introduces ``example bias''. We evaluate zero-shot strategies to modulate this tendency using 1,441 healthcare feedback records and two LLMs. We compare instruction-based methods with structural constraints that modify the token generation sequence, specifically an Enum-First format requiring domain enumeration before selection. Results show that structural constraints substantially reduce single-label rates (Magistral: 96{\%} {\textrightarrow} 19{\%}; Qwen3: 54{\%} {\textrightarrow} 0.0{\%}), though the latter suggests potential over-correction compared to human baselines (16.7{--}41.3{\%}). These findings indicate that while output structure is a potent modulator of classification behavior by shifting the decision point upstream, its effect magnitude is model-dependent, necessitating empirical calibration to prevent spurious associations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choo-hong-2026-modulating">
<titleInfo>
<title>Modulating Multi-Label Tendency in Zero-Shot LLM Coding: The Effect of Output Structure on CDSS Feedback Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hyunwoo</namePart>
<namePart type="family">Choo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sungsoo</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Danilova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfalı</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ylva</namePart>
<namePart type="family">Söderfeldt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Reed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Burchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-367-8</identifier>
</relatedItem>
<abstract>Large language models (LLMs) often default to single-label classification in zero-shot multi-label tasks—a tendency we term “conservative default”. While few-shot prompting mitigates this, it introduces “example bias”. We evaluate zero-shot strategies to modulate this tendency using 1,441 healthcare feedback records and two LLMs. We compare instruction-based methods with structural constraints that modify the token generation sequence, specifically an Enum-First format requiring domain enumeration before selection. Results show that structural constraints substantially reduce single-label rates (Magistral: 96% → 19%; Qwen3: 54% → 0.0%), though the latter suggests potential over-correction compared to human baselines (16.7–41.3%). These findings indicate that while output structure is a potent modulator of classification behavior by shifting the decision point upstream, its effect magnitude is model-dependent, necessitating empirical calibration to prevent spurious associations.</abstract>
<identifier type="citekey">choo-hong-2026-modulating</identifier>
<location>
<url>https://aclanthology.org/2026.healing-1.14/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>172</start>
<end>179</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modulating Multi-Label Tendency in Zero-Shot LLM Coding: The Effect of Output Structure on CDSS Feedback Analysis
%A Choo, Hyunwoo
%A Hong, Sungsoo
%Y Danilova, Vera
%Y Kurfalı, Murathan
%Y Söderfeldt, Ylva
%Y Reed, Julia
%Y Burchell, Andrew
%S Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-367-8
%F choo-hong-2026-modulating
%X Large language models (LLMs) often default to single-label classification in zero-shot multi-label tasks—a tendency we term “conservative default”. While few-shot prompting mitigates this, it introduces “example bias”. We evaluate zero-shot strategies to modulate this tendency using 1,441 healthcare feedback records and two LLMs. We compare instruction-based methods with structural constraints that modify the token generation sequence, specifically an Enum-First format requiring domain enumeration before selection. Results show that structural constraints substantially reduce single-label rates (Magistral: 96% → 19%; Qwen3: 54% → 0.0%), though the latter suggests potential over-correction compared to human baselines (16.7–41.3%). These findings indicate that while output structure is a potent modulator of classification behavior by shifting the decision point upstream, its effect magnitude is model-dependent, necessitating empirical calibration to prevent spurious associations.
%U https://aclanthology.org/2026.healing-1.14/
%P 172-179
Markdown (Informal)
[Modulating Multi-Label Tendency in Zero-Shot LLM Coding: The Effect of Output Structure on CDSS Feedback Analysis](https://aclanthology.org/2026.healing-1.14/) (Choo & Hong, HeaLing 2026)
ACL