@inproceedings{islam-erana-2026-cognac,
title = "{COGNAC} at {S}em{E}val-2026 Task 5: {LLM} Ensembles for Human-Level Word Sense Plausibility Rating in Challenging Narratives",
author = "Islam, Azwad Anjum and
Erana, Tisa Islam",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.414/",
pages = "3328--3336",
ISBN = "979-8-89176-414-9",
abstract = "We present a system for SemEval-2026 Task 5 that predicts 1{--}5 plausibility ratings for candidate senses of homonyms in ambiguous short stories using prompting with closed-source LLMs. We evaluate three prompting strategies: zero-shot, chain-of-thought, and comparative prompting that jointly scores competing senses. We also find simple unweighted ensembling better aligns with subjective human judgments better than individual models. Our official submission ranked 4th on the leaderboard with an average score of 0.86, with post-competition experiments improving performance to 0.89."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="islam-erana-2026-cognac">
<titleInfo>
<title>COGNAC at SemEval-2026 Task 5: LLM Ensembles for Human-Level Word Sense Plausibility Rating in Challenging Narratives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Azwad</namePart>
<namePart type="given">Anjum</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tisa</namePart>
<namePart type="given">Islam</namePart>
<namePart type="family">Erana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>We present a system for SemEval-2026 Task 5 that predicts 1–5 plausibility ratings for candidate senses of homonyms in ambiguous short stories using prompting with closed-source LLMs. We evaluate three prompting strategies: zero-shot, chain-of-thought, and comparative prompting that jointly scores competing senses. We also find simple unweighted ensembling better aligns with subjective human judgments better than individual models. Our official submission ranked 4th on the leaderboard with an average score of 0.86, with post-competition experiments improving performance to 0.89.</abstract>
<identifier type="citekey">islam-erana-2026-cognac</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.414/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>3328</start>
<end>3336</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T COGNAC at SemEval-2026 Task 5: LLM Ensembles for Human-Level Word Sense Plausibility Rating in Challenging Narratives
%A Islam, Azwad Anjum
%A Erana, Tisa Islam
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F islam-erana-2026-cognac
%X We present a system for SemEval-2026 Task 5 that predicts 1–5 plausibility ratings for candidate senses of homonyms in ambiguous short stories using prompting with closed-source LLMs. We evaluate three prompting strategies: zero-shot, chain-of-thought, and comparative prompting that jointly scores competing senses. We also find simple unweighted ensembling better aligns with subjective human judgments better than individual models. Our official submission ranked 4th on the leaderboard with an average score of 0.86, with post-competition experiments improving performance to 0.89.
%U https://aclanthology.org/2026.semeval-1.414/
%P 3328-3336
Markdown (Informal)
[COGNAC at SemEval-2026 Task 5: LLM Ensembles for Human-Level Word Sense Plausibility Rating in Challenging Narratives](https://aclanthology.org/2026.semeval-1.414/) (Islam & Erana, SemEval 2026)
ACL