@inproceedings{lu-etal-2025-unimath,
title = "{U}ni{M}ath-{C}o{T}: A Unified Framework for Multimodal Mathematical Reasoning with Re-Inference Affirmation",
author = "Lu, Zhixiang and
Zhou, Mian and
Stefanidis, Angelos and
Su, Jionglong",
editor = "Valentino, Marco and
Ferreira, Deborah and
Thayaparan, Mokanarangan and
Ranaldi, Leonardo and
Freitas, Andre",
booktitle = "Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.mathnlp-main.13/",
pages = "176--185",
ISBN = "979-8-89176-348-7",
abstract = "Large Language Models (LLMs) have achieved considerable success in text-based mathematical reasoning, yet their potential remains underexplored in the multimodal mathematics domain where joint text and image understanding is imperative. A key bottleneck hindering progress is the scarcity of high-quality, genuinely multimodal benchmarks. To address this gap, we construct a unified benchmark by consolidating and curating three public multimodal mathematics datasets. We subsequently propose the UniMath-CoT framework, which establishes a robust performance baseline by combining Chain-of-Thought (CoT) principles with efficient Supervised Fine-Tuning (SFT) based on Low-Rank Adaptation (LoRA). Furthermore, to bolster the model{'}s reasoning robustness, we introduce an innovative verification mechanism, AARI (Answer Affirmation by Re-Inference), which leverages a specialized re-inference protocol to have the model self-scrutinize and validate its initial conclusions. Our comprehensive experiments show that this integrated strategy substantially boosts performance, surpassing a wide range of open-source models and markedly closing the gap with leading proprietary systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2025-unimath">
<titleInfo>
<title>UniMath-CoT: A Unified Framework for Multimodal Mathematical Reasoning with Re-Inference Affirmation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhixiang</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mian</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angelos</namePart>
<namePart type="family">Stefanidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jionglong</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valentino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deborah</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mokanarangan</namePart>
<namePart type="family">Thayaparan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Ranaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-348-7</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have achieved considerable success in text-based mathematical reasoning, yet their potential remains underexplored in the multimodal mathematics domain where joint text and image understanding is imperative. A key bottleneck hindering progress is the scarcity of high-quality, genuinely multimodal benchmarks. To address this gap, we construct a unified benchmark by consolidating and curating three public multimodal mathematics datasets. We subsequently propose the UniMath-CoT framework, which establishes a robust performance baseline by combining Chain-of-Thought (CoT) principles with efficient Supervised Fine-Tuning (SFT) based on Low-Rank Adaptation (LoRA). Furthermore, to bolster the model’s reasoning robustness, we introduce an innovative verification mechanism, AARI (Answer Affirmation by Re-Inference), which leverages a specialized re-inference protocol to have the model self-scrutinize and validate its initial conclusions. Our comprehensive experiments show that this integrated strategy substantially boosts performance, surpassing a wide range of open-source models and markedly closing the gap with leading proprietary systems.</abstract>
<identifier type="citekey">lu-etal-2025-unimath</identifier>
<location>
<url>https://aclanthology.org/2025.mathnlp-main.13/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>176</start>
<end>185</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UniMath-CoT: A Unified Framework for Multimodal Mathematical Reasoning with Re-Inference Affirmation
%A Lu, Zhixiang
%A Zhou, Mian
%A Stefanidis, Angelos
%A Su, Jionglong
%Y Valentino, Marco
%Y Ferreira, Deborah
%Y Thayaparan, Mokanarangan
%Y Ranaldi, Leonardo
%Y Freitas, Andre
%S Proceedings of The 3rd Workshop on Mathematical Natural Language Processing (MathNLP 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-348-7
%F lu-etal-2025-unimath
%X Large Language Models (LLMs) have achieved considerable success in text-based mathematical reasoning, yet their potential remains underexplored in the multimodal mathematics domain where joint text and image understanding is imperative. A key bottleneck hindering progress is the scarcity of high-quality, genuinely multimodal benchmarks. To address this gap, we construct a unified benchmark by consolidating and curating three public multimodal mathematics datasets. We subsequently propose the UniMath-CoT framework, which establishes a robust performance baseline by combining Chain-of-Thought (CoT) principles with efficient Supervised Fine-Tuning (SFT) based on Low-Rank Adaptation (LoRA). Furthermore, to bolster the model’s reasoning robustness, we introduce an innovative verification mechanism, AARI (Answer Affirmation by Re-Inference), which leverages a specialized re-inference protocol to have the model self-scrutinize and validate its initial conclusions. Our comprehensive experiments show that this integrated strategy substantially boosts performance, surpassing a wide range of open-source models and markedly closing the gap with leading proprietary systems.
%U https://aclanthology.org/2025.mathnlp-main.13/
%P 176-185
Markdown (Informal)
[UniMath-CoT: A Unified Framework for Multimodal Mathematical Reasoning with Re-Inference Affirmation](https://aclanthology.org/2025.mathnlp-main.13/) (Lu et al., MathNLP 2025)
ACL