@inproceedings{baruah-2026-abaruah-semeval,
title = "{ABARUAH} at {S}em{E}val-2026 Task 1: Leveraging High-Resolution {VLM}s and Reasoning {LLM}s for Multimodal Humor Generation",
author = "Baruah, Arup",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.436/",
pages = "3536--3543",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes the systems developed for ``SemEval 2026 Task 1: Humor Generation''. This shared task covered both unimodal text constraints and multimodal GIF-based humor generation. The proposed approach used a two-stage pipeline consisting of a Multimodal Grounding stage to extract semantic descriptions from GIFs and a Humor Synthesis stage to generate the final humorous output. The Qwen2-VL and Qwen3-8B models were used for these respective stages. The system achieved competitive Elo-like ratings of 1009, 973, and 914 for Subtasks A, B1, and B2, respectively, demonstrating its ability to address diverse humorous constraints. The system was ranked 4th in overall standings for Subtasks A and B1."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="baruah-2026-abaruah-semeval">
<titleInfo>
<title>ABARUAH at SemEval-2026 Task 1: Leveraging High-Resolution VLMs and Reasoning LLMs for Multimodal Humor Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arup</namePart>
<namePart type="family">Baruah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>This paper describes the systems developed for “SemEval 2026 Task 1: Humor Generation”. This shared task covered both unimodal text constraints and multimodal GIF-based humor generation. The proposed approach used a two-stage pipeline consisting of a Multimodal Grounding stage to extract semantic descriptions from GIFs and a Humor Synthesis stage to generate the final humorous output. The Qwen2-VL and Qwen3-8B models were used for these respective stages. The system achieved competitive Elo-like ratings of 1009, 973, and 914 for Subtasks A, B1, and B2, respectively, demonstrating its ability to address diverse humorous constraints. The system was ranked 4th in overall standings for Subtasks A and B1.</abstract>
<identifier type="citekey">baruah-2026-abaruah-semeval</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.436/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>3536</start>
<end>3543</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ABARUAH at SemEval-2026 Task 1: Leveraging High-Resolution VLMs and Reasoning LLMs for Multimodal Humor Generation
%A Baruah, Arup
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F baruah-2026-abaruah-semeval
%X This paper describes the systems developed for “SemEval 2026 Task 1: Humor Generation”. This shared task covered both unimodal text constraints and multimodal GIF-based humor generation. The proposed approach used a two-stage pipeline consisting of a Multimodal Grounding stage to extract semantic descriptions from GIFs and a Humor Synthesis stage to generate the final humorous output. The Qwen2-VL and Qwen3-8B models were used for these respective stages. The system achieved competitive Elo-like ratings of 1009, 973, and 914 for Subtasks A, B1, and B2, respectively, demonstrating its ability to address diverse humorous constraints. The system was ranked 4th in overall standings for Subtasks A and B1.
%U https://aclanthology.org/2026.semeval-1.436/
%P 3536-3543
Markdown (Informal)
[ABARUAH at SemEval-2026 Task 1: Leveraging High-Resolution VLMs and Reasoning LLMs for Multimodal Humor Generation](https://aclanthology.org/2026.semeval-1.436/) (Baruah, SemEval 2026)
ACL