@inproceedings{nooralahzadeh-etal-2024-statbot,
title = "{S}tat{B}ot.{S}wiss: Bilingual Open Data Exploration in Natural Language",
author = {Nooralahzadeh, Farhad and
Zhang, Yi and
Smith, Ellery and
Maennel, Sabine and
Matthey-Doret, Cyril and
De Fondeville, Rapha{\"e}l and
Stockinger, Kurt},
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.326",
pages = "5486--5507",
abstract = "The potential for improvements brought by Large Language Models (LLMs) in Text-to-SQL systems is mostly assessed on monolingual English datasets. However, LLMs{'} performance for other languages remains vastly unexplored. In this work, we release the StatBot.Swiss dataset, the first bilingual benchmark for evaluating Text-to-SQL systems based on real-world applications. The StatBot.Swiss dataset contains 455 natural language/SQL-pairs over 35 big databases with varying level of complexity for both English and German.We evaluate the performance of state-of-the-art LLMs such as GPT-3.5-Turbo and mixtral-8x7b-instruct for the Text-to-SQL translation task using an in-context learning approach. Our experimental analysis illustrates that current LLMs struggle to generalize well in generating SQL queries on our novel bilingual dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nooralahzadeh-etal-2024-statbot">
<titleInfo>
<title>StatBot.Swiss: Bilingual Open Data Exploration in Natural Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Farhad</namePart>
<namePart type="family">Nooralahzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellery</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Maennel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cyril</namePart>
<namePart type="family">Matthey-Doret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raphaël</namePart>
<namePart type="family">De Fondeville</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kurt</namePart>
<namePart type="family">Stockinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand and virtual meeting</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The potential for improvements brought by Large Language Models (LLMs) in Text-to-SQL systems is mostly assessed on monolingual English datasets. However, LLMs’ performance for other languages remains vastly unexplored. In this work, we release the StatBot.Swiss dataset, the first bilingual benchmark for evaluating Text-to-SQL systems based on real-world applications. The StatBot.Swiss dataset contains 455 natural language/SQL-pairs over 35 big databases with varying level of complexity for both English and German.We evaluate the performance of state-of-the-art LLMs such as GPT-3.5-Turbo and mixtral-8x7b-instruct for the Text-to-SQL translation task using an in-context learning approach. Our experimental analysis illustrates that current LLMs struggle to generalize well in generating SQL queries on our novel bilingual dataset.</abstract>
<identifier type="citekey">nooralahzadeh-etal-2024-statbot</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.326</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>5486</start>
<end>5507</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T StatBot.Swiss: Bilingual Open Data Exploration in Natural Language
%A Nooralahzadeh, Farhad
%A Zhang, Yi
%A Smith, Ellery
%A Maennel, Sabine
%A Matthey-Doret, Cyril
%A De Fondeville, Raphaël
%A Stockinger, Kurt
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand and virtual meeting
%F nooralahzadeh-etal-2024-statbot
%X The potential for improvements brought by Large Language Models (LLMs) in Text-to-SQL systems is mostly assessed on monolingual English datasets. However, LLMs’ performance for other languages remains vastly unexplored. In this work, we release the StatBot.Swiss dataset, the first bilingual benchmark for evaluating Text-to-SQL systems based on real-world applications. The StatBot.Swiss dataset contains 455 natural language/SQL-pairs over 35 big databases with varying level of complexity for both English and German.We evaluate the performance of state-of-the-art LLMs such as GPT-3.5-Turbo and mixtral-8x7b-instruct for the Text-to-SQL translation task using an in-context learning approach. Our experimental analysis illustrates that current LLMs struggle to generalize well in generating SQL queries on our novel bilingual dataset.
%U https://aclanthology.org/2024.findings-acl.326
%P 5486-5507
Markdown (Informal)
[StatBot.Swiss: Bilingual Open Data Exploration in Natural Language](https://aclanthology.org/2024.findings-acl.326) (Nooralahzadeh et al., Findings 2024)
ACL
- Farhad Nooralahzadeh, Yi Zhang, Ellery Smith, Sabine Maennel, Cyril Matthey-Doret, Raphaël De Fondeville, and Kurt Stockinger. 2024. StatBot.Swiss: Bilingual Open Data Exploration in Natural Language. In Findings of the Association for Computational Linguistics ACL 2024, pages 5486–5507, Bangkok, Thailand and virtual meeting. Association for Computational Linguistics.