@inproceedings{cai-etal-2022-star,
title = "{STAR}: {SQL} Guided Pre-Training for Context-dependent Text-to-{SQL} Parsing",
author = "Cai, Zefeng and
Li, Xiangyu and
Hui, Binyuan and
Yang, Min and
Li, Bowen and
Li, Binhua and
Cao, Zheng and
Li, Weijie and
Huang, Fei and
Si, Luo and
Li, Yongbin",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.89",
doi = "10.18653/v1/2022.findings-emnlp.89",
pages = "1235--1247",
abstract = "In this paper, we propose a novel SQL guided pre-training framework STAR for context-dependent text-to-SQL parsing, which leverages contextual information to enrich natural language (NL) utterance and table schema representations for text-to-SQL conversations. Concretely, we propose two novel pre-training objectives which respectively explore the context-dependent interactions of NL utterances and SQL queries within each text-to-SQL conversation: (i) schema state tracking (SST) objective that tracks and explores the schema states of context-dependent SQL queries in the form of schema-states by predicting and updating the value of each schema slot during interaction; (ii) utterance dependency tracking (UDT) objective that employs weighted contrastive learning to pull together two semantically similar NL utterances and push away the representations of semantically dissimilar NL utterances within each conversation. In addition, we construct a high-quality large-scale context-dependent text-to-SQL conversation corpus to pre-train STAR. Extensive experiments show that STAR achieves new state-of-the-art performance on two downstream benchmarks (SParC and CoSQL), significantly outperforming previous pre-training methods and ranking first on the leaderboard. We believe the release of the constructed corpus, codebase and pre-trained STAR checkpoints would push forward the research in this area.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cai-etal-2022-star">
<titleInfo>
<title>STAR: SQL Guided Pre-Training for Context-dependent Text-to-SQL Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zefeng</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiangyu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Binyuan</namePart>
<namePart type="family">Hui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bowen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Binhua</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weijie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luo</namePart>
<namePart type="family">Si</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongbin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a novel SQL guided pre-training framework STAR for context-dependent text-to-SQL parsing, which leverages contextual information to enrich natural language (NL) utterance and table schema representations for text-to-SQL conversations. Concretely, we propose two novel pre-training objectives which respectively explore the context-dependent interactions of NL utterances and SQL queries within each text-to-SQL conversation: (i) schema state tracking (SST) objective that tracks and explores the schema states of context-dependent SQL queries in the form of schema-states by predicting and updating the value of each schema slot during interaction; (ii) utterance dependency tracking (UDT) objective that employs weighted contrastive learning to pull together two semantically similar NL utterances and push away the representations of semantically dissimilar NL utterances within each conversation. In addition, we construct a high-quality large-scale context-dependent text-to-SQL conversation corpus to pre-train STAR. Extensive experiments show that STAR achieves new state-of-the-art performance on two downstream benchmarks (SParC and CoSQL), significantly outperforming previous pre-training methods and ranking first on the leaderboard. We believe the release of the constructed corpus, codebase and pre-trained STAR checkpoints would push forward the research in this area.</abstract>
<identifier type="citekey">cai-etal-2022-star</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.89</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.89</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>1235</start>
<end>1247</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T STAR: SQL Guided Pre-Training for Context-dependent Text-to-SQL Parsing
%A Cai, Zefeng
%A Li, Xiangyu
%A Hui, Binyuan
%A Yang, Min
%A Li, Bowen
%A Li, Binhua
%A Cao, Zheng
%A Li, Weijie
%A Huang, Fei
%A Si, Luo
%A Li, Yongbin
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F cai-etal-2022-star
%X In this paper, we propose a novel SQL guided pre-training framework STAR for context-dependent text-to-SQL parsing, which leverages contextual information to enrich natural language (NL) utterance and table schema representations for text-to-SQL conversations. Concretely, we propose two novel pre-training objectives which respectively explore the context-dependent interactions of NL utterances and SQL queries within each text-to-SQL conversation: (i) schema state tracking (SST) objective that tracks and explores the schema states of context-dependent SQL queries in the form of schema-states by predicting and updating the value of each schema slot during interaction; (ii) utterance dependency tracking (UDT) objective that employs weighted contrastive learning to pull together two semantically similar NL utterances and push away the representations of semantically dissimilar NL utterances within each conversation. In addition, we construct a high-quality large-scale context-dependent text-to-SQL conversation corpus to pre-train STAR. Extensive experiments show that STAR achieves new state-of-the-art performance on two downstream benchmarks (SParC and CoSQL), significantly outperforming previous pre-training methods and ranking first on the leaderboard. We believe the release of the constructed corpus, codebase and pre-trained STAR checkpoints would push forward the research in this area.
%R 10.18653/v1/2022.findings-emnlp.89
%U https://aclanthology.org/2022.findings-emnlp.89
%U https://doi.org/10.18653/v1/2022.findings-emnlp.89
%P 1235-1247
Markdown (Informal)
[STAR: SQL Guided Pre-Training for Context-dependent Text-to-SQL Parsing](https://aclanthology.org/2022.findings-emnlp.89) (Cai et al., Findings 2022)
ACL
- Zefeng Cai, Xiangyu Li, Binyuan Hui, Min Yang, Bowen Li, Binhua Li, Zheng Cao, Weijie Li, Fei Huang, Luo Si, and Yongbin Li. 2022. STAR: SQL Guided Pre-Training for Context-dependent Text-to-SQL Parsing. In Findings of the Association for Computational Linguistics: EMNLP 2022, pages 1235–1247, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.