@inproceedings{vaibhav-srivastava-2022-makadi,
title = "Makadi: A Large-Scale Human-Labeled Dataset for {H}indi Semantic Parsing",
author = "Vaibhav, Shashwat and
Srivastava, Nisheeth",
editor = "Jha, Girish Nath and
L., Sobha and
Bali, Kalika and
Ojha, Atul Kr.",
booktitle = "Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.wildre-1.12",
pages = "68--73",
abstract = "Parsing natural language queries into formal database calls is a very well-studied problem. Because of the rich diversity of semantic markers across the world{'}s languages, progress in solving this problem is irreducibly language-dependent. This has created an asymmetry in progress in NLIDB solutions, with most state-of-the-art efforts focused on the resource-rich English language, with limited progress seen for low resource languages. In this short paper, we present Makadi, a large-scale, complex, cross-lingual, cross-domain semantic parsing and text-to-SQL dataset for semantic parsing in the Hindi language. Produced by translating the recently introduced English language Spider NLIDB dataset, it consists of 9693 questions and SQL queries on 166 databases with multiple tables which cover multiple domains. This is the first large-scale dataset in the Hindi language for semantic parsing and related language understanding tasks. Our dataset is publicly available at: Link removed to preserve anonymization during peer review.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vaibhav-srivastava-2022-makadi">
<titleInfo>
<title>Makadi: A Large-Scale Human-Labeled Dataset for Hindi Semantic Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shashwat</namePart>
<namePart type="family">Vaibhav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nisheeth</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Girish</namePart>
<namePart type="given">Nath</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">L.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Parsing natural language queries into formal database calls is a very well-studied problem. Because of the rich diversity of semantic markers across the world’s languages, progress in solving this problem is irreducibly language-dependent. This has created an asymmetry in progress in NLIDB solutions, with most state-of-the-art efforts focused on the resource-rich English language, with limited progress seen for low resource languages. In this short paper, we present Makadi, a large-scale, complex, cross-lingual, cross-domain semantic parsing and text-to-SQL dataset for semantic parsing in the Hindi language. Produced by translating the recently introduced English language Spider NLIDB dataset, it consists of 9693 questions and SQL queries on 166 databases with multiple tables which cover multiple domains. This is the first large-scale dataset in the Hindi language for semantic parsing and related language understanding tasks. Our dataset is publicly available at: Link removed to preserve anonymization during peer review.</abstract>
<identifier type="citekey">vaibhav-srivastava-2022-makadi</identifier>
<location>
<url>https://aclanthology.org/2022.wildre-1.12</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>68</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Makadi: A Large-Scale Human-Labeled Dataset for Hindi Semantic Parsing
%A Vaibhav, Shashwat
%A Srivastava, Nisheeth
%Y Jha, Girish Nath
%Y L., Sobha
%Y Bali, Kalika
%Y Ojha, Atul Kr.
%S Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F vaibhav-srivastava-2022-makadi
%X Parsing natural language queries into formal database calls is a very well-studied problem. Because of the rich diversity of semantic markers across the world’s languages, progress in solving this problem is irreducibly language-dependent. This has created an asymmetry in progress in NLIDB solutions, with most state-of-the-art efforts focused on the resource-rich English language, with limited progress seen for low resource languages. In this short paper, we present Makadi, a large-scale, complex, cross-lingual, cross-domain semantic parsing and text-to-SQL dataset for semantic parsing in the Hindi language. Produced by translating the recently introduced English language Spider NLIDB dataset, it consists of 9693 questions and SQL queries on 166 databases with multiple tables which cover multiple domains. This is the first large-scale dataset in the Hindi language for semantic parsing and related language understanding tasks. Our dataset is publicly available at: Link removed to preserve anonymization during peer review.
%U https://aclanthology.org/2022.wildre-1.12
%P 68-73
Markdown (Informal)
[Makadi: A Large-Scale Human-Labeled Dataset for Hindi Semantic Parsing](https://aclanthology.org/2022.wildre-1.12) (Vaibhav & Srivastava, WILDRE 2022)
ACL