@inproceedings{chowdhury-etal-2026-abstractreasoner,
title = "{A}bstract{R}easoner at {S}em{E}val-2026 Task 11: Reducing Content Effects via Knowledge Distillation and Structured Reasoning Prompts",
author = "Chowdhury, Akash and
Pavlovich, Vlad and
Dunfoy, Julius and
Yang, Sophia and
Borra, Abhiram",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.315/",
pages = "2495--2505",
ISBN = "979-8-89176-414-9",
abstract = "Syllogistic reasoning serves as a critical diagnostic for evaluating whether Large Language Models (LLMs) perform genuine logical inference or rely on semantic shortcuts. SemEval-2026 task 11 explores ``content effects''{---}where model judgments are biased by world knowledge rather than logical form. Recent work has illustrated that LLM optimization techniques have provided substantial performance gains in mitigating content effect. To contribute to this research domain, this paper performs a systematic study of different intervention strategies: zero-shot chain of thought, symbolic representation, activation-steering, and supervised fine-tuning along with prompting optimization during inference. We achieved the best performance with our largest model (Phi-4 14B) by fine-tuning with chain of thought distillation, symbolic abstractions and LLM as optimizer prompting (FTOptim) evaluated on the held-out split derived from the training data. This approach achieved the highest Combined Smooth Score (CSS) of 31.16. Additionally, Llama 3.1 provided noteworthy performance with 31.01 CSS under the same FTOptim approach, indicating the performance gain was LLM-agnostic."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chowdhury-etal-2026-abstractreasoner">
<titleInfo>
<title>AbstractReasoner at SemEval-2026 Task 11: Reducing Content Effects via Knowledge Distillation and Structured Reasoning Prompts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akash</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vlad</namePart>
<namePart type="family">Pavlovich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julius</namePart>
<namePart type="family">Dunfoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhiram</namePart>
<namePart type="family">Borra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>Syllogistic reasoning serves as a critical diagnostic for evaluating whether Large Language Models (LLMs) perform genuine logical inference or rely on semantic shortcuts. SemEval-2026 task 11 explores “content effects”—where model judgments are biased by world knowledge rather than logical form. Recent work has illustrated that LLM optimization techniques have provided substantial performance gains in mitigating content effect. To contribute to this research domain, this paper performs a systematic study of different intervention strategies: zero-shot chain of thought, symbolic representation, activation-steering, and supervised fine-tuning along with prompting optimization during inference. We achieved the best performance with our largest model (Phi-4 14B) by fine-tuning with chain of thought distillation, symbolic abstractions and LLM as optimizer prompting (FTOptim) evaluated on the held-out split derived from the training data. This approach achieved the highest Combined Smooth Score (CSS) of 31.16. Additionally, Llama 3.1 provided noteworthy performance with 31.01 CSS under the same FTOptim approach, indicating the performance gain was LLM-agnostic.</abstract>
<identifier type="citekey">chowdhury-etal-2026-abstractreasoner</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.315/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2495</start>
<end>2505</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AbstractReasoner at SemEval-2026 Task 11: Reducing Content Effects via Knowledge Distillation and Structured Reasoning Prompts
%A Chowdhury, Akash
%A Pavlovich, Vlad
%A Dunfoy, Julius
%A Yang, Sophia
%A Borra, Abhiram
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F chowdhury-etal-2026-abstractreasoner
%X Syllogistic reasoning serves as a critical diagnostic for evaluating whether Large Language Models (LLMs) perform genuine logical inference or rely on semantic shortcuts. SemEval-2026 task 11 explores “content effects”—where model judgments are biased by world knowledge rather than logical form. Recent work has illustrated that LLM optimization techniques have provided substantial performance gains in mitigating content effect. To contribute to this research domain, this paper performs a systematic study of different intervention strategies: zero-shot chain of thought, symbolic representation, activation-steering, and supervised fine-tuning along with prompting optimization during inference. We achieved the best performance with our largest model (Phi-4 14B) by fine-tuning with chain of thought distillation, symbolic abstractions and LLM as optimizer prompting (FTOptim) evaluated on the held-out split derived from the training data. This approach achieved the highest Combined Smooth Score (CSS) of 31.16. Additionally, Llama 3.1 provided noteworthy performance with 31.01 CSS under the same FTOptim approach, indicating the performance gain was LLM-agnostic.
%U https://aclanthology.org/2026.semeval-1.315/
%P 2495-2505
Markdown (Informal)
[AbstractReasoner at SemEval-2026 Task 11: Reducing Content Effects via Knowledge Distillation and Structured Reasoning Prompts](https://aclanthology.org/2026.semeval-1.315/) (Chowdhury et al., SemEval 2026)
ACL