@inproceedings{popescu-etal-2022-addressing,
title = "Addressing Limitations of Encoder-Decoder Based Approach to Text-to-{SQL}",
author = "Popescu, Octavian and
Manotas, Irene and
Vo, Ngoc Phuoc An and
Yeo, Hangu and
Khorashani, Elahe and
Sheinin, Vadim",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.137",
pages = "1593--1603",
abstract = "Most attempts on Text-to-SQL task using encoder-decoder approach show a big problem of dramatic decline in performance for new databases. For the popular Spider dataset, despite models achieving 70{\%} accuracy on its development or test sets, the same models show a huge decline below 20{\%} accuracy for unseen databases. The root causes for this problem are complex and they cannot be easily fixed by adding more manually created training. In this paper we address the problem and propose a solution that is a hybrid system using automated training-data augmentation technique. Our system consists of a rule-based and a deep learning components that interact to understand crucial information in a given query and produce correct SQL as a result. It achieves double-digit percentage improvement for databases that are not part of the Spider corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="popescu-etal-2022-addressing">
<titleInfo>
<title>Addressing Limitations of Encoder-Decoder Based Approach to Text-to-SQL</title>
</titleInfo>
<name type="personal">
<namePart type="given">Octavian</namePart>
<namePart type="family">Popescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irene</namePart>
<namePart type="family">Manotas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngoc</namePart>
<namePart type="given">Phuoc</namePart>
<namePart type="given">An</namePart>
<namePart type="family">Vo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hangu</namePart>
<namePart type="family">Yeo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elahe</namePart>
<namePart type="family">Khorashani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vadim</namePart>
<namePart type="family">Sheinin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most attempts on Text-to-SQL task using encoder-decoder approach show a big problem of dramatic decline in performance for new databases. For the popular Spider dataset, despite models achieving 70% accuracy on its development or test sets, the same models show a huge decline below 20% accuracy for unseen databases. The root causes for this problem are complex and they cannot be easily fixed by adding more manually created training. In this paper we address the problem and propose a solution that is a hybrid system using automated training-data augmentation technique. Our system consists of a rule-based and a deep learning components that interact to understand crucial information in a given query and produce correct SQL as a result. It achieves double-digit percentage improvement for databases that are not part of the Spider corpus.</abstract>
<identifier type="citekey">popescu-etal-2022-addressing</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.137</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>1593</start>
<end>1603</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Addressing Limitations of Encoder-Decoder Based Approach to Text-to-SQL
%A Popescu, Octavian
%A Manotas, Irene
%A Vo, Ngoc Phuoc An
%A Yeo, Hangu
%A Khorashani, Elahe
%A Sheinin, Vadim
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F popescu-etal-2022-addressing
%X Most attempts on Text-to-SQL task using encoder-decoder approach show a big problem of dramatic decline in performance for new databases. For the popular Spider dataset, despite models achieving 70% accuracy on its development or test sets, the same models show a huge decline below 20% accuracy for unseen databases. The root causes for this problem are complex and they cannot be easily fixed by adding more manually created training. In this paper we address the problem and propose a solution that is a hybrid system using automated training-data augmentation technique. Our system consists of a rule-based and a deep learning components that interact to understand crucial information in a given query and produce correct SQL as a result. It achieves double-digit percentage improvement for databases that are not part of the Spider corpus.
%U https://aclanthology.org/2022.coling-1.137
%P 1593-1603
Markdown (Informal)
[Addressing Limitations of Encoder-Decoder Based Approach to Text-to-SQL](https://aclanthology.org/2022.coling-1.137) (Popescu et al., COLING 2022)
ACL
- Octavian Popescu, Irene Manotas, Ngoc Phuoc An Vo, Hangu Yeo, Elahe Khorashani, and Vadim Sheinin. 2022. Addressing Limitations of Encoder-Decoder Based Approach to Text-to-SQL. In Proceedings of the 29th International Conference on Computational Linguistics, pages 1593–1603, Gyeongju, Republic of Korea. International Committee on Computational Linguistics.