@inproceedings{li-etal-2023-exploring,
title = "Exploring Schema Generalizability of Text-to-{SQL}",
author = "Li, Jieyu and
Chen, Lu and
Cao, Ruisheng and
Zhu, Su and
Xu, Hongshen and
Chen, Zhi and
Zhang, Hanchong and
Yu, Kai",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.87",
doi = "10.18653/v1/2023.findings-acl.87",
pages = "1344--1360",
abstract = "Exploring the generalizability of a text-to-SQL parser is essential for a system to automatically adapt the real-world databases. Previous investigation works mostly focus on lexical diversity, including the influence of the synonym and perturbations in both natural language questions and databases. However, the structural variability of database schema (DS), as a widely seen real-world scenario, is yet underexplored. Specifically, confronted with the same input question, the target SQL may be represented in different ways when the DS comes to a different structure. In this work, we provide in-depth discussions about the schema generalizability challenge of text-to-SQL tasks. We observe that current datasets are too templated to study schema generalization. To collect suitable test data, we propose a framework to generate novel text-to-SQL data via automatic and synchronous (DS, SQL) pair altering. When evaluating state-of-the-art text-to-SQL models on the synthetic samples, performance is significantly degraded, which demonstrates the limitation of current research regarding schema generalization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2023-exploring">
<titleInfo>
<title>Exploring Schema Generalizability of Text-to-SQL</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jieyu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruisheng</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Su</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongshen</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanchong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Exploring the generalizability of a text-to-SQL parser is essential for a system to automatically adapt the real-world databases. Previous investigation works mostly focus on lexical diversity, including the influence of the synonym and perturbations in both natural language questions and databases. However, the structural variability of database schema (DS), as a widely seen real-world scenario, is yet underexplored. Specifically, confronted with the same input question, the target SQL may be represented in different ways when the DS comes to a different structure. In this work, we provide in-depth discussions about the schema generalizability challenge of text-to-SQL tasks. We observe that current datasets are too templated to study schema generalization. To collect suitable test data, we propose a framework to generate novel text-to-SQL data via automatic and synchronous (DS, SQL) pair altering. When evaluating state-of-the-art text-to-SQL models on the synthetic samples, performance is significantly degraded, which demonstrates the limitation of current research regarding schema generalization.</abstract>
<identifier type="citekey">li-etal-2023-exploring</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.87</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.87</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1344</start>
<end>1360</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Schema Generalizability of Text-to-SQL
%A Li, Jieyu
%A Chen, Lu
%A Cao, Ruisheng
%A Zhu, Su
%A Xu, Hongshen
%A Chen, Zhi
%A Zhang, Hanchong
%A Yu, Kai
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F li-etal-2023-exploring
%X Exploring the generalizability of a text-to-SQL parser is essential for a system to automatically adapt the real-world databases. Previous investigation works mostly focus on lexical diversity, including the influence of the synonym and perturbations in both natural language questions and databases. However, the structural variability of database schema (DS), as a widely seen real-world scenario, is yet underexplored. Specifically, confronted with the same input question, the target SQL may be represented in different ways when the DS comes to a different structure. In this work, we provide in-depth discussions about the schema generalizability challenge of text-to-SQL tasks. We observe that current datasets are too templated to study schema generalization. To collect suitable test data, we propose a framework to generate novel text-to-SQL data via automatic and synchronous (DS, SQL) pair altering. When evaluating state-of-the-art text-to-SQL models on the synthetic samples, performance is significantly degraded, which demonstrates the limitation of current research regarding schema generalization.
%R 10.18653/v1/2023.findings-acl.87
%U https://aclanthology.org/2023.findings-acl.87
%U https://doi.org/10.18653/v1/2023.findings-acl.87
%P 1344-1360
Markdown (Informal)
[Exploring Schema Generalizability of Text-to-SQL](https://aclanthology.org/2023.findings-acl.87) (Li et al., Findings 2023)
ACL
- Jieyu Li, Lu Chen, Ruisheng Cao, Su Zhu, Hongshen Xu, Zhi Chen, Hanchong Zhang, and Kai Yu. 2023. Exploring Schema Generalizability of Text-to-SQL. In Findings of the Association for Computational Linguistics: ACL 2023, pages 1344–1360, Toronto, Canada. Association for Computational Linguistics.