@inproceedings{nguyen-etal-2025-finstat2sql,
title = "{F}in{S}tat2{SQL}: A {T}ext2{SQL} Pipeline for Financial Statement Analysis",
author = "Nguyen, Hung Quang and
Trinh, Anh Phuong and
Mai, Hung Phan Quoc and
Trinh, Phong Tuan",
editor = "Flek, Lucie and
Narayan, Shashi and
Phương, L{\^e} Hồng and
Pei, Jiahuan",
booktitle = "Proceedings of the 18th International Natural Language Generation Conference",
month = oct,
year = "2025",
address = "Hanoi, Vietnam",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.inlg-main.27/",
pages = "449--464",
abstract = "Despite recent advances in LLMs, Text2SQL remains challenging for complex, domain-specific queries such as finance, where database designs and reporting standards vary widely. We introduce FinStat2SQL, a lightweight pipeline enabling natural language queries over financial statements, tailored to local standards like VAS. Our multi-agent setup combines large and small language models for entity extraction, SQL generation, and self-correction, and includes a fully automatic pipeline for synthetic data generation. Leveraging this synthetic data, we fine-tuned a 7B model that achieves 61.33{\%} accuracy with sub-4s latency on consumer hardware, outperforming GPT-4o-mini on SQL generation. FinStat2SQL provides a scalable, cost-efficient solution for financial analysis. We made our source code publicly available at: https://github.com/hung20gg/chatbot{\_}financial{\_}statement."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2025-finstat2sql">
<titleInfo>
<title>FinStat2SQL: A Text2SQL Pipeline for Financial Statement Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hung</namePart>
<namePart type="given">Quang</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anh</namePart>
<namePart type="given">Phuong</namePart>
<namePart type="family">Trinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung</namePart>
<namePart type="given">Phan</namePart>
<namePart type="given">Quoc</namePart>
<namePart type="family">Mai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phong</namePart>
<namePart type="given">Tuan</namePart>
<namePart type="family">Trinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucie</namePart>
<namePart type="family">Flek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashi</namePart>
<namePart type="family">Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lê</namePart>
<namePart type="given">Hồng</namePart>
<namePart type="family">Phương</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahuan</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hanoi, Vietnam</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite recent advances in LLMs, Text2SQL remains challenging for complex, domain-specific queries such as finance, where database designs and reporting standards vary widely. We introduce FinStat2SQL, a lightweight pipeline enabling natural language queries over financial statements, tailored to local standards like VAS. Our multi-agent setup combines large and small language models for entity extraction, SQL generation, and self-correction, and includes a fully automatic pipeline for synthetic data generation. Leveraging this synthetic data, we fine-tuned a 7B model that achieves 61.33% accuracy with sub-4s latency on consumer hardware, outperforming GPT-4o-mini on SQL generation. FinStat2SQL provides a scalable, cost-efficient solution for financial analysis. We made our source code publicly available at: https://github.com/hung20gg/chatbot_financial_statement.</abstract>
<identifier type="citekey">nguyen-etal-2025-finstat2sql</identifier>
<location>
<url>https://aclanthology.org/2025.inlg-main.27/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>449</start>
<end>464</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FinStat2SQL: A Text2SQL Pipeline for Financial Statement Analysis
%A Nguyen, Hung Quang
%A Trinh, Anh Phuong
%A Mai, Hung Phan Quoc
%A Trinh, Phong Tuan
%Y Flek, Lucie
%Y Narayan, Shashi
%Y Phương, Lê Hồng
%Y Pei, Jiahuan
%S Proceedings of the 18th International Natural Language Generation Conference
%D 2025
%8 October
%I Association for Computational Linguistics
%C Hanoi, Vietnam
%F nguyen-etal-2025-finstat2sql
%X Despite recent advances in LLMs, Text2SQL remains challenging for complex, domain-specific queries such as finance, where database designs and reporting standards vary widely. We introduce FinStat2SQL, a lightweight pipeline enabling natural language queries over financial statements, tailored to local standards like VAS. Our multi-agent setup combines large and small language models for entity extraction, SQL generation, and self-correction, and includes a fully automatic pipeline for synthetic data generation. Leveraging this synthetic data, we fine-tuned a 7B model that achieves 61.33% accuracy with sub-4s latency on consumer hardware, outperforming GPT-4o-mini on SQL generation. FinStat2SQL provides a scalable, cost-efficient solution for financial analysis. We made our source code publicly available at: https://github.com/hung20gg/chatbot_financial_statement.
%U https://aclanthology.org/2025.inlg-main.27/
%P 449-464
Markdown (Informal)
[FinStat2SQL: A Text2SQL Pipeline for Financial Statement Analysis](https://aclanthology.org/2025.inlg-main.27/) (Nguyen et al., INLG 2025)
ACL