@inproceedings{zou-etal-2026-sutta,
title = "sutta at {S}em{E}val-2026 Task 12: A Multi-Perspective Retrieve-Verify-Aggregate Framework for Abductive Event Reasoning",
author = "Zou, Junliu and
Yang, Liang and
Zeng, Jingjie",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.323/",
pages = "2564--2574",
ISBN = "979-8-89176-414-9",
abstract = "We present our system for SemEval-2026 Task 12: Abductive Event Reasoning (AER). The task asks models to identify the direct causes of real-world events from multiple-choice options using retrieved documents. Rather than fine-tuning on the training data, we built a zero-shot ``Retrieve-Verify-Aggregate'' pipeline around Qwen3-8B. We first isolate relevant evidence using BM25 and cross-encoder reranking. To evaluate causal links, we prompt the model with several distinct ``personas'' and aggregate their independent decisions through majority voting. Our system scored 0.7614 on the official test set. This performance suggests that strict retrieval combined with diverse reasoning prompts can help compact open-source models ignore irrelevant context and perform complex causal inference, entirely without task-specific training."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zou-etal-2026-sutta">
<titleInfo>
<title>sutta at SemEval-2026 Task 12: A Multi-Perspective Retrieve-Verify-Aggregate Framework for Abductive Event Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junliu</namePart>
<namePart type="family">Zou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingjie</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>We present our system for SemEval-2026 Task 12: Abductive Event Reasoning (AER). The task asks models to identify the direct causes of real-world events from multiple-choice options using retrieved documents. Rather than fine-tuning on the training data, we built a zero-shot “Retrieve-Verify-Aggregate” pipeline around Qwen3-8B. We first isolate relevant evidence using BM25 and cross-encoder reranking. To evaluate causal links, we prompt the model with several distinct “personas” and aggregate their independent decisions through majority voting. Our system scored 0.7614 on the official test set. This performance suggests that strict retrieval combined with diverse reasoning prompts can help compact open-source models ignore irrelevant context and perform complex causal inference, entirely without task-specific training.</abstract>
<identifier type="citekey">zou-etal-2026-sutta</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.323/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2564</start>
<end>2574</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T sutta at SemEval-2026 Task 12: A Multi-Perspective Retrieve-Verify-Aggregate Framework for Abductive Event Reasoning
%A Zou, Junliu
%A Yang, Liang
%A Zeng, Jingjie
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F zou-etal-2026-sutta
%X We present our system for SemEval-2026 Task 12: Abductive Event Reasoning (AER). The task asks models to identify the direct causes of real-world events from multiple-choice options using retrieved documents. Rather than fine-tuning on the training data, we built a zero-shot “Retrieve-Verify-Aggregate” pipeline around Qwen3-8B. We first isolate relevant evidence using BM25 and cross-encoder reranking. To evaluate causal links, we prompt the model with several distinct “personas” and aggregate their independent decisions through majority voting. Our system scored 0.7614 on the official test set. This performance suggests that strict retrieval combined with diverse reasoning prompts can help compact open-source models ignore irrelevant context and perform complex causal inference, entirely without task-specific training.
%U https://aclanthology.org/2026.semeval-1.323/
%P 2564-2574
Markdown (Informal)
[sutta at SemEval-2026 Task 12: A Multi-Perspective Retrieve-Verify-Aggregate Framework for Abductive Event Reasoning](https://aclanthology.org/2026.semeval-1.323/) (Zou et al., SemEval 2026)
ACL