@inproceedings{gera-neal-2026-diagnosing,
title = "Diagnosing Generalization in Open-Source {LLM}s for Stance Detection",
author = "Gera, Parush and
Neal, Tempestt",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.21/",
pages = "317--330",
ISBN = "979-8-89176-413-2",
abstract = "Stance detection identifies whether a text expresses support, opposition, or neutrality toward a target and is central to applications such as political analysis and misinformation monitoring. With the shift toward large language models (LLMs), stance classification increasingly relies on prompting and lightweight adaptation. Yet the generalization behavior of open-source LLMs across new targets and domains remains uneven. We conduct a large-scale diagnostic study of four open-source LLMs (3B{--}24B parameters), examining how model size, prompting strategies, and Low-Rank Adaptation (LoRA) interact across in-target, cross-target, and cross-domain settings. Across 912 experiments, three patterns emerge: (1) larger models improve prompting-based in-target performance, but this advantage diminishes after fine-tuning; (2) LoRA boosts in-target accuracy yet often harms cross-context transfer; (3) optimal prompting depends on model size. These results reveal a consistent tension between specialization and generalization, offering practical guidance for configuring LLM-based stance detection under transfer."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gera-neal-2026-diagnosing">
<titleInfo>
<title>Diagnosing Generalization in Open-Source LLMs for Stance Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parush</namePart>
<namePart type="family">Gera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tempestt</namePart>
<namePart type="family">Neal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>Stance detection identifies whether a text expresses support, opposition, or neutrality toward a target and is central to applications such as political analysis and misinformation monitoring. With the shift toward large language models (LLMs), stance classification increasingly relies on prompting and lightweight adaptation. Yet the generalization behavior of open-source LLMs across new targets and domains remains uneven. We conduct a large-scale diagnostic study of four open-source LLMs (3B–24B parameters), examining how model size, prompting strategies, and Low-Rank Adaptation (LoRA) interact across in-target, cross-target, and cross-domain settings. Across 912 experiments, three patterns emerge: (1) larger models improve prompting-based in-target performance, but this advantage diminishes after fine-tuning; (2) LoRA boosts in-target accuracy yet often harms cross-context transfer; (3) optimal prompting depends on model size. These results reveal a consistent tension between specialization and generalization, offering practical guidance for configuring LLM-based stance detection under transfer.</abstract>
<identifier type="citekey">gera-neal-2026-diagnosing</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.21/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>317</start>
<end>330</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diagnosing Generalization in Open-Source LLMs for Stance Detection
%A Gera, Parush
%A Neal, Tempestt
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F gera-neal-2026-diagnosing
%X Stance detection identifies whether a text expresses support, opposition, or neutrality toward a target and is central to applications such as political analysis and misinformation monitoring. With the shift toward large language models (LLMs), stance classification increasingly relies on prompting and lightweight adaptation. Yet the generalization behavior of open-source LLMs across new targets and domains remains uneven. We conduct a large-scale diagnostic study of four open-source LLMs (3B–24B parameters), examining how model size, prompting strategies, and Low-Rank Adaptation (LoRA) interact across in-target, cross-target, and cross-domain settings. Across 912 experiments, three patterns emerge: (1) larger models improve prompting-based in-target performance, but this advantage diminishes after fine-tuning; (2) LoRA boosts in-target accuracy yet often harms cross-context transfer; (3) optimal prompting depends on model size. These results reveal a consistent tension between specialization and generalization, offering practical guidance for configuring LLM-based stance detection under transfer.
%U https://aclanthology.org/2026.starsem-conference.21/
%P 317-330
Markdown (Informal)
[Diagnosing Generalization in Open-Source LLMs for Stance Detection](https://aclanthology.org/2026.starsem-conference.21/) (Gera & Neal, *SEM 2026)
ACL