@inproceedings{lopez-ponce-etal-2026-gil,
title = "{GIL}-{Z}aragoza at {S}em{E}val 2026 Task 11: Comparing Classification, Autoformalization, and Ontologies for Formal Reasoning Capabilities",
author = "Lopez-Ponce, Francisco and
Pitarch, Lucia and
Saavedra Mart{\'i}nez, Iv{\'a}n and
Huitzil, Ignacio and
Ojeda Trueba, Sergio and
Bobillo, Fernando and
Bel-Enguix, Gemma",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.308/",
pages = "2438--2446",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes our participation in Task 11 of SemEval-2026, which evaluates the ability of models to determine logical validity of syllogisms independent of real-world content. We develop and compare three approaches for Subtask 1: (1) an encoder-based classification baseline using both classical ML methods and fine-tuned BERT with debiasing strategies; (2) an autoformalization pipeline combining DPO-aligned models with first order logic translation and formal inference via Prover9; and (3) a hybrid neuro-symbolic approach using GPT to generate OWL 2 ontologies evaluated with the HermiT reasoner. Our best result was achieved by the encoder-based classifier, obtaining a 72.25{\textbackslash}{\%} accuracy and a combined score of 20.37, placing 40th out of 45 participating teams. Analysis shows that classification methods exhibit lower content bias, autoformalization approaches suffer from translation inconsistencies and syntax incompatibilities, and ontology-based reasoning is hindered by prompt design limitations and verbose serialization formats. All our code can be found in the paper{'}s repository."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lopez-ponce-etal-2026-gil">
<titleInfo>
<title>GIL-Zaragoza at SemEval 2026 Task 11: Comparing Classification, Autoformalization, and Ontologies for Formal Reasoning Capabilities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Lopez-Ponce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Pitarch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iván</namePart>
<namePart type="family">Saavedra Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ignacio</namePart>
<namePart type="family">Huitzil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Ojeda Trueba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Bobillo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Bel-Enguix</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>This paper describes our participation in Task 11 of SemEval-2026, which evaluates the ability of models to determine logical validity of syllogisms independent of real-world content. We develop and compare three approaches for Subtask 1: (1) an encoder-based classification baseline using both classical ML methods and fine-tuned BERT with debiasing strategies; (2) an autoformalization pipeline combining DPO-aligned models with first order logic translation and formal inference via Prover9; and (3) a hybrid neuro-symbolic approach using GPT to generate OWL 2 ontologies evaluated with the HermiT reasoner. Our best result was achieved by the encoder-based classifier, obtaining a 72.25\textbackslash% accuracy and a combined score of 20.37, placing 40th out of 45 participating teams. Analysis shows that classification methods exhibit lower content bias, autoformalization approaches suffer from translation inconsistencies and syntax incompatibilities, and ontology-based reasoning is hindered by prompt design limitations and verbose serialization formats. All our code can be found in the paper’s repository.</abstract>
<identifier type="citekey">lopez-ponce-etal-2026-gil</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.308/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2438</start>
<end>2446</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GIL-Zaragoza at SemEval 2026 Task 11: Comparing Classification, Autoformalization, and Ontologies for Formal Reasoning Capabilities
%A Lopez-Ponce, Francisco
%A Pitarch, Lucia
%A Saavedra Martínez, Iván
%A Huitzil, Ignacio
%A Ojeda Trueba, Sergio
%A Bobillo, Fernando
%A Bel-Enguix, Gemma
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F lopez-ponce-etal-2026-gil
%X This paper describes our participation in Task 11 of SemEval-2026, which evaluates the ability of models to determine logical validity of syllogisms independent of real-world content. We develop and compare three approaches for Subtask 1: (1) an encoder-based classification baseline using both classical ML methods and fine-tuned BERT with debiasing strategies; (2) an autoformalization pipeline combining DPO-aligned models with first order logic translation and formal inference via Prover9; and (3) a hybrid neuro-symbolic approach using GPT to generate OWL 2 ontologies evaluated with the HermiT reasoner. Our best result was achieved by the encoder-based classifier, obtaining a 72.25\textbackslash% accuracy and a combined score of 20.37, placing 40th out of 45 participating teams. Analysis shows that classification methods exhibit lower content bias, autoformalization approaches suffer from translation inconsistencies and syntax incompatibilities, and ontology-based reasoning is hindered by prompt design limitations and verbose serialization formats. All our code can be found in the paper’s repository.
%U https://aclanthology.org/2026.semeval-1.308/
%P 2438-2446
Markdown (Informal)
[GIL-Zaragoza at SemEval 2026 Task 11: Comparing Classification, Autoformalization, and Ontologies for Formal Reasoning Capabilities](https://aclanthology.org/2026.semeval-1.308/) (Lopez-Ponce et al., SemEval 2026)
ACL
- Francisco Lopez-Ponce, Lucia Pitarch, Iván Saavedra Martínez, Ignacio Huitzil, Sergio Ojeda Trueba, Fernando Bobillo, and Gemma Bel-Enguix. 2026. GIL-Zaragoza at SemEval 2026 Task 11: Comparing Classification, Autoformalization, and Ontologies for Formal Reasoning Capabilities. In Proceedings of the 20th International Workshop on Semantic Evaluation (2026), pages 2438–2446, San Diego, California, USA. Association for Computational Linguistics.