@inproceedings{sharma-etal-2025-ttd,
title = "{TTD}-{SQL}: Tree-Guided Token Decoding for Efficient and Schema-Aware {SQL} Generation",
author = "Sharma, Chetan and
Narayanam, Ramasuri and
Pal, Soumyabrata and
Yeturu, Kalidas and
Saini, Shiv Kumar and
Mukherjee, Koyel",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.90/",
pages = "1287--1298",
ISBN = "979-8-89176-333-3",
abstract = "Natural language interfaces (NLIs) democratize data analytics by enabling non-technical users to query relational databases via Text-to-SQL systems. While large language models (LLMs) have achieved state-of-the-art accuracy on benchmarks like Spider and BIRD, two critical challenges persist for real-time deployment: (1) inference latency due to sequential autoregressive decoding (e.g., average inference latency on BIRD (Minidev) is 14.3 seconds per query for Qwen2.5-Coder32B and 22.86 seconds for Llama-70B.), and (2) schema hallucinations (e.g., invalid column references like customer{\_}ids instead of cust{\_}id). (2) schema hallucinations (e.g., Qwen2.5-Coder-32B Instruct generated ... COUNT(users.UserId) ... = users.Id ..., using users.Id correctly in JOIN but hallucinating users.UserId in COUNT). To address these, we propose Tree-Guided Token Decoding (TTD-SQL), a lightweight framework that integrates SQL grammar and database schema constraints into the decoding process without modifying the underlying LLM. TTD precomputes token-level decision trees over SQL keywords, table names, and column identifiers, enabling deterministic ``auto-fill'' transitions for uniquely determined tokens (e.g., ``Song{\_}'' {\textrightarrow} ``ID'') while retaining flexibility for unconstrained reasoning. Across five LLMs (CodeLlama, Phi-4, Qwen2.5, Granite, Llama70B), TTD achieves up to 19.96{\%} token-rate speedups by eliminating redundant forward passes (e.g., CodeLlama: 8.97{\textrightarrow}10.76 tokens/s on Spider) and reduces schema hallucinations by +17.7{\%} in executable-SQL rates (e.g., CodeLlama on BIRD). By bridging rigid parser based methods and flexible LLM generation, TTD offers a practical path toward reliable, high-performance SQL generation in both public benchmarks and enterprise settings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharma-etal-2025-ttd">
<titleInfo>
<title>TTD-SQL: Tree-Guided Token Decoding for Efficient and Schema-Aware SQL Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chetan</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramasuri</namePart>
<namePart type="family">Narayanam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soumyabrata</namePart>
<namePart type="family">Pal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalidas</namePart>
<namePart type="family">Yeturu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiv</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Saini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koyel</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Natural language interfaces (NLIs) democratize data analytics by enabling non-technical users to query relational databases via Text-to-SQL systems. While large language models (LLMs) have achieved state-of-the-art accuracy on benchmarks like Spider and BIRD, two critical challenges persist for real-time deployment: (1) inference latency due to sequential autoregressive decoding (e.g., average inference latency on BIRD (Minidev) is 14.3 seconds per query for Qwen2.5-Coder32B and 22.86 seconds for Llama-70B.), and (2) schema hallucinations (e.g., invalid column references like customer_ids instead of cust_id). (2) schema hallucinations (e.g., Qwen2.5-Coder-32B Instruct generated ... COUNT(users.UserId) ... = users.Id ..., using users.Id correctly in JOIN but hallucinating users.UserId in COUNT). To address these, we propose Tree-Guided Token Decoding (TTD-SQL), a lightweight framework that integrates SQL grammar and database schema constraints into the decoding process without modifying the underlying LLM. TTD precomputes token-level decision trees over SQL keywords, table names, and column identifiers, enabling deterministic “auto-fill” transitions for uniquely determined tokens (e.g., “Song_” → “ID”) while retaining flexibility for unconstrained reasoning. Across five LLMs (CodeLlama, Phi-4, Qwen2.5, Granite, Llama70B), TTD achieves up to 19.96% token-rate speedups by eliminating redundant forward passes (e.g., CodeLlama: 8.97→10.76 tokens/s on Spider) and reduces schema hallucinations by +17.7% in executable-SQL rates (e.g., CodeLlama on BIRD). By bridging rigid parser based methods and flexible LLM generation, TTD offers a practical path toward reliable, high-performance SQL generation in both public benchmarks and enterprise settings.</abstract>
<identifier type="citekey">sharma-etal-2025-ttd</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.90/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>1287</start>
<end>1298</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TTD-SQL: Tree-Guided Token Decoding for Efficient and Schema-Aware SQL Generation
%A Sharma, Chetan
%A Narayanam, Ramasuri
%A Pal, Soumyabrata
%A Yeturu, Kalidas
%A Saini, Shiv Kumar
%A Mukherjee, Koyel
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F sharma-etal-2025-ttd
%X Natural language interfaces (NLIs) democratize data analytics by enabling non-technical users to query relational databases via Text-to-SQL systems. While large language models (LLMs) have achieved state-of-the-art accuracy on benchmarks like Spider and BIRD, two critical challenges persist for real-time deployment: (1) inference latency due to sequential autoregressive decoding (e.g., average inference latency on BIRD (Minidev) is 14.3 seconds per query for Qwen2.5-Coder32B and 22.86 seconds for Llama-70B.), and (2) schema hallucinations (e.g., invalid column references like customer_ids instead of cust_id). (2) schema hallucinations (e.g., Qwen2.5-Coder-32B Instruct generated ... COUNT(users.UserId) ... = users.Id ..., using users.Id correctly in JOIN but hallucinating users.UserId in COUNT). To address these, we propose Tree-Guided Token Decoding (TTD-SQL), a lightweight framework that integrates SQL grammar and database schema constraints into the decoding process without modifying the underlying LLM. TTD precomputes token-level decision trees over SQL keywords, table names, and column identifiers, enabling deterministic “auto-fill” transitions for uniquely determined tokens (e.g., “Song_” → “ID”) while retaining flexibility for unconstrained reasoning. Across five LLMs (CodeLlama, Phi-4, Qwen2.5, Granite, Llama70B), TTD achieves up to 19.96% token-rate speedups by eliminating redundant forward passes (e.g., CodeLlama: 8.97→10.76 tokens/s on Spider) and reduces schema hallucinations by +17.7% in executable-SQL rates (e.g., CodeLlama on BIRD). By bridging rigid parser based methods and flexible LLM generation, TTD offers a practical path toward reliable, high-performance SQL generation in both public benchmarks and enterprise settings.
%U https://aclanthology.org/2025.emnlp-industry.90/
%P 1287-1298
Markdown (Informal)
[TTD-SQL: Tree-Guided Token Decoding for Efficient and Schema-Aware SQL Generation](https://aclanthology.org/2025.emnlp-industry.90/) (Sharma et al., EMNLP 2025)
ACL