@inproceedings{kawada-holyoak-2026-cascademind,
title = "{C}ascade{M}ind at {S}em{E}val-2026 Task 4: A Hybrid Neuro-Symbolic Cascade for Narrative Similarity",
author = "Kawada, Sebastien and
Holyoak, Dylan",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.204/",
pages = "1575--1581",
ISBN = "979-8-89176-414-9",
abstract = "Across self-consistency samples from an LLM, vote agreement tracks instance difficulty: on SemEval-2026 Task 4 (Narrative Story Similarity), supermajority cases ({\ensuremath{\geq}} 7/8 votes) resolve at 85{\%} accuracy, split votes at 67{\%}, and perfect ties at 61{\%}, a monotone gradient that holds across the development set. We exploit this in CascadeMind, which routes eight Gemini 2.5 Flash votes by consensus, escalates split votes to additional sampling rounds, and falls through to a symbolic ensemble of theory-inspired narrative signals only on perfect ties (5{\%} of cases). The system reached 72.75{\%} on Track A test, placing 10th of 44 teams. Ablations show that the symbolic component contributes negligibly end-to-end and that nearly all gains come from confidence-aware routing. The takeaway is methodological: for narrative similarity, calibrating when to spend more compute on a hard instance matters more than adding auxiliary representations to reason about it. Code is available at https://github.com/chreia/CascadeMind-ACL."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kawada-holyoak-2026-cascademind">
<titleInfo>
<title>CascadeMind at SemEval-2026 Task 4: A Hybrid Neuro-Symbolic Cascade for Narrative Similarity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Kawada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dylan</namePart>
<namePart type="family">Holyoak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Across self-consistency samples from an LLM, vote agreement tracks instance difficulty: on SemEval-2026 Task 4 (Narrative Story Similarity), supermajority cases (\ensuremath\geq 7/8 votes) resolve at 85% accuracy, split votes at 67%, and perfect ties at 61%, a monotone gradient that holds across the development set. We exploit this in CascadeMind, which routes eight Gemini 2.5 Flash votes by consensus, escalates split votes to additional sampling rounds, and falls through to a symbolic ensemble of theory-inspired narrative signals only on perfect ties (5% of cases). The system reached 72.75% on Track A test, placing 10th of 44 teams. Ablations show that the symbolic component contributes negligibly end-to-end and that nearly all gains come from confidence-aware routing. The takeaway is methodological: for narrative similarity, calibrating when to spend more compute on a hard instance matters more than adding auxiliary representations to reason about it. Code is available at https://github.com/chreia/CascadeMind-ACL.</abstract>
<identifier type="citekey">kawada-holyoak-2026-cascademind</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.204/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1575</start>
<end>1581</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CascadeMind at SemEval-2026 Task 4: A Hybrid Neuro-Symbolic Cascade for Narrative Similarity
%A Kawada, Sebastien
%A Holyoak, Dylan
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F kawada-holyoak-2026-cascademind
%X Across self-consistency samples from an LLM, vote agreement tracks instance difficulty: on SemEval-2026 Task 4 (Narrative Story Similarity), supermajority cases (\ensuremath\geq 7/8 votes) resolve at 85% accuracy, split votes at 67%, and perfect ties at 61%, a monotone gradient that holds across the development set. We exploit this in CascadeMind, which routes eight Gemini 2.5 Flash votes by consensus, escalates split votes to additional sampling rounds, and falls through to a symbolic ensemble of theory-inspired narrative signals only on perfect ties (5% of cases). The system reached 72.75% on Track A test, placing 10th of 44 teams. Ablations show that the symbolic component contributes negligibly end-to-end and that nearly all gains come from confidence-aware routing. The takeaway is methodological: for narrative similarity, calibrating when to spend more compute on a hard instance matters more than adding auxiliary representations to reason about it. Code is available at https://github.com/chreia/CascadeMind-ACL.
%U https://aclanthology.org/2026.semeval-1.204/
%P 1575-1581
Markdown (Informal)
[CascadeMind at SemEval-2026 Task 4: A Hybrid Neuro-Symbolic Cascade for Narrative Similarity](https://aclanthology.org/2026.semeval-1.204/) (Kawada & Holyoak, SemEval 2026)
ACL