@inproceedings{wang-etal-2026-rv,
title = "{RV}-Syn: Rational and Verifiable Mathematical Reasoning Data Synthesis based on Structured Function Library",
author = "Wang, Jiapeng and
Jiang, Jinhao and
Zhang, Zhiqiang and
Zhou, Jun and
Zhao, Xin",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.93/",
pages = "1812--1827",
ISBN = "979-8-89176-386-9",
abstract = "The advancement of reasoning capabilities in Large Language Models (LLMs) requires substantial amounts of high-quality reasoning data, particularly in mathematics. Existing data synthesis methods, such as data augmentation from annotated training sets or direct question generation based on relevant knowledge points and documents, have expanded datasets but face challenges in mastering the internal logic of the problem during generation and ensuring the verifiability of the solutions. To address these issues, we propose \textbf{RV-Syn}, a novel Rational and Verifiable mathematical Synthesis approach. RV-Syn first constructs a structured library of mathematical operations and then composes them into executable computational graphs, which serve as verifiable solution blueprints. These graphs are subsequently back-translated into complex problems, enabling solution-guided, logic-aware problem generation while inherently ensuring the verifiability of the solving process. Experimental results show RV-Syn surpasses existing synthesis methods, including those involving human-crafted problems. Our method achieves a 6.3{\%} performance gain over the previous state-of-the-art synthetic data on LLaMA-3-8B and demonstrates superior data efficiency, outperforming others with only half the training data (50k vs. 100k), enabling a more scalable and robust reasoning dataset generation framework."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-rv">
<titleInfo>
<title>RV-Syn: Rational and Verifiable Mathematical Reasoning Data Synthesis based on Structured Function Library</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiapeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinhao</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>The advancement of reasoning capabilities in Large Language Models (LLMs) requires substantial amounts of high-quality reasoning data, particularly in mathematics. Existing data synthesis methods, such as data augmentation from annotated training sets or direct question generation based on relevant knowledge points and documents, have expanded datasets but face challenges in mastering the internal logic of the problem during generation and ensuring the verifiability of the solutions. To address these issues, we propose RV-Syn, a novel Rational and Verifiable mathematical Synthesis approach. RV-Syn first constructs a structured library of mathematical operations and then composes them into executable computational graphs, which serve as verifiable solution blueprints. These graphs are subsequently back-translated into complex problems, enabling solution-guided, logic-aware problem generation while inherently ensuring the verifiability of the solving process. Experimental results show RV-Syn surpasses existing synthesis methods, including those involving human-crafted problems. Our method achieves a 6.3% performance gain over the previous state-of-the-art synthetic data on LLaMA-3-8B and demonstrates superior data efficiency, outperforming others with only half the training data (50k vs. 100k), enabling a more scalable and robust reasoning dataset generation framework.</abstract>
<identifier type="citekey">wang-etal-2026-rv</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.93/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>1812</start>
<end>1827</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RV-Syn: Rational and Verifiable Mathematical Reasoning Data Synthesis based on Structured Function Library
%A Wang, Jiapeng
%A Jiang, Jinhao
%A Zhang, Zhiqiang
%A Zhou, Jun
%A Zhao, Xin
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F wang-etal-2026-rv
%X The advancement of reasoning capabilities in Large Language Models (LLMs) requires substantial amounts of high-quality reasoning data, particularly in mathematics. Existing data synthesis methods, such as data augmentation from annotated training sets or direct question generation based on relevant knowledge points and documents, have expanded datasets but face challenges in mastering the internal logic of the problem during generation and ensuring the verifiability of the solutions. To address these issues, we propose RV-Syn, a novel Rational and Verifiable mathematical Synthesis approach. RV-Syn first constructs a structured library of mathematical operations and then composes them into executable computational graphs, which serve as verifiable solution blueprints. These graphs are subsequently back-translated into complex problems, enabling solution-guided, logic-aware problem generation while inherently ensuring the verifiability of the solving process. Experimental results show RV-Syn surpasses existing synthesis methods, including those involving human-crafted problems. Our method achieves a 6.3% performance gain over the previous state-of-the-art synthetic data on LLaMA-3-8B and demonstrates superior data efficiency, outperforming others with only half the training data (50k vs. 100k), enabling a more scalable and robust reasoning dataset generation framework.
%U https://aclanthology.org/2026.findings-eacl.93/
%P 1812-1827
Markdown (Informal)
[RV-Syn: Rational and Verifiable Mathematical Reasoning Data Synthesis based on Structured Function Library](https://aclanthology.org/2026.findings-eacl.93/) (Wang et al., Findings 2026)
ACL