@inproceedings{stoisser-etal-2025-sparks,
title = "Sparks of Tabular Reasoning via {T}ext2{SQL} Reinforcement Learning",
author = "Stoisser, Josefa Lia and
Martell, Marc Boubnovski and
Fauqueur, Julien",
editor = "Chang, Shuaichen and
Hulsebos, Madelon and
Liu, Qian and
Chen, Wenhu and
Sun, Huan",
booktitle = "Proceedings of the 4th Table Representation Learning Workshop",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.trl-1.20/",
doi = "10.18653/v1/2025.trl-1.20",
pages = "229--240",
ISBN = "979-8-89176-268-8",
abstract = "This work reframes the Text-to-SQL task as a pathway for teaching large language models (LLMs) to reason over and manipulate tabular data{---}moving beyond the traditional focus on query generation. We propose a two-stage framework that leverages SQL supervision to develop transferable table reasoning capabilities. First, we synthesize detailed chain-of-thought (CoT) traces from real-world SQL queries, providing step-by-step, clause-level supervision that teaches the model how to traverse, filter, and aggregate table fields. Second, we introduce a Group Relative Policy Optimization (GRPO) reinforcement learning objective that connects SQL execution accuracy to generalizable reasoning by encouraging steps that extend beyond task-specific syntax and transfer across datasets.Empirically, our approach improves performance on standard Text-to-SQL benchmarks and achieves substantial gains on reasoning-intensive datasets such as BIRD, CRT-QA and Tablebench, demonstrating enhanced generalization and interpretability. Specifically, the distilled-quantized LLaMA-8B model achieved a 34{\%} relative increase in exact match scores on CRT-QA when trained on Text-to-SQL tasks, while Qwen-2.5-7B achieved a 10{\%} and Qwen-2.5-14B a 6{\%} relative increase. These results suggest that SQL can serve not only as a target formalism but also as an effective scaffold for learning robust, transferable reasoning over structured data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stoisser-etal-2025-sparks">
<titleInfo>
<title>Sparks of Tabular Reasoning via Text2SQL Reinforcement Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Josefa</namePart>
<namePart type="given">Lia</namePart>
<namePart type="family">Stoisser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="given">Boubnovski</namePart>
<namePart type="family">Martell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Fauqueur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Table Representation Learning Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuaichen</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madelon</namePart>
<namePart type="family">Hulsebos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huan</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-268-8</identifier>
</relatedItem>
<abstract>This work reframes the Text-to-SQL task as a pathway for teaching large language models (LLMs) to reason over and manipulate tabular data—moving beyond the traditional focus on query generation. We propose a two-stage framework that leverages SQL supervision to develop transferable table reasoning capabilities. First, we synthesize detailed chain-of-thought (CoT) traces from real-world SQL queries, providing step-by-step, clause-level supervision that teaches the model how to traverse, filter, and aggregate table fields. Second, we introduce a Group Relative Policy Optimization (GRPO) reinforcement learning objective that connects SQL execution accuracy to generalizable reasoning by encouraging steps that extend beyond task-specific syntax and transfer across datasets.Empirically, our approach improves performance on standard Text-to-SQL benchmarks and achieves substantial gains on reasoning-intensive datasets such as BIRD, CRT-QA and Tablebench, demonstrating enhanced generalization and interpretability. Specifically, the distilled-quantized LLaMA-8B model achieved a 34% relative increase in exact match scores on CRT-QA when trained on Text-to-SQL tasks, while Qwen-2.5-7B achieved a 10% and Qwen-2.5-14B a 6% relative increase. These results suggest that SQL can serve not only as a target formalism but also as an effective scaffold for learning robust, transferable reasoning over structured data.</abstract>
<identifier type="citekey">stoisser-etal-2025-sparks</identifier>
<identifier type="doi">10.18653/v1/2025.trl-1.20</identifier>
<location>
<url>https://aclanthology.org/2025.trl-1.20/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>229</start>
<end>240</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sparks of Tabular Reasoning via Text2SQL Reinforcement Learning
%A Stoisser, Josefa Lia
%A Martell, Marc Boubnovski
%A Fauqueur, Julien
%Y Chang, Shuaichen
%Y Hulsebos, Madelon
%Y Liu, Qian
%Y Chen, Wenhu
%Y Sun, Huan
%S Proceedings of the 4th Table Representation Learning Workshop
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-268-8
%F stoisser-etal-2025-sparks
%X This work reframes the Text-to-SQL task as a pathway for teaching large language models (LLMs) to reason over and manipulate tabular data—moving beyond the traditional focus on query generation. We propose a two-stage framework that leverages SQL supervision to develop transferable table reasoning capabilities. First, we synthesize detailed chain-of-thought (CoT) traces from real-world SQL queries, providing step-by-step, clause-level supervision that teaches the model how to traverse, filter, and aggregate table fields. Second, we introduce a Group Relative Policy Optimization (GRPO) reinforcement learning objective that connects SQL execution accuracy to generalizable reasoning by encouraging steps that extend beyond task-specific syntax and transfer across datasets.Empirically, our approach improves performance on standard Text-to-SQL benchmarks and achieves substantial gains on reasoning-intensive datasets such as BIRD, CRT-QA and Tablebench, demonstrating enhanced generalization and interpretability. Specifically, the distilled-quantized LLaMA-8B model achieved a 34% relative increase in exact match scores on CRT-QA when trained on Text-to-SQL tasks, while Qwen-2.5-7B achieved a 10% and Qwen-2.5-14B a 6% relative increase. These results suggest that SQL can serve not only as a target formalism but also as an effective scaffold for learning robust, transferable reasoning over structured data.
%R 10.18653/v1/2025.trl-1.20
%U https://aclanthology.org/2025.trl-1.20/
%U https://doi.org/10.18653/v1/2025.trl-1.20
%P 229-240
Markdown (Informal)
[Sparks of Tabular Reasoning via Text2SQL Reinforcement Learning](https://aclanthology.org/2025.trl-1.20/) (Stoisser et al., TRL 2025)
ACL