@inproceedings{akella-etal-2024-automatic,
title = "An Automatic Prompt Generation System for Tabular Data Tasks",
author = "Akella, Ashlesha and
Manatkar, Abhijit and
Chavda, Brijkumar and
Patel, Hima",
editor = "Yang, Yi and
Davani, Aida and
Sil, Avi and
Kumar, Anoop",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-industry.16",
doi = "10.18653/v1/2024.naacl-industry.16",
pages = "191--200",
abstract = "Efficient processing of tabular data is important in various industries, especially when working with datasets containing a large number of columns. Large language models (LLMs) have demonstrated their ability on several tasks through carefully crafted prompts. However, creating effective prompts for tabular datasets is challenging due to the structured nature of the data and the need to manage numerous columns. This paper presents an innovative auto-prompt generation system suitable for multiple LLMs, with minimal training. It proposes two novel methods; 1) A Reinforcement Learning-based algorithm for identifying and sequencing task-relevant columns 2) cell-level similarity-based approach for enhancing few-shot example selection. Our approach has been extensively tested across 66 datasets, demonstrating improved performance in three downstream tasks: data imputation, error detection, and entity matching using two distinct LLMs; Google/flant-t5xxl and Mixtral 8x7B.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akella-etal-2024-automatic">
<titleInfo>
<title>An Automatic Prompt Generation System for Tabular Data Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ashlesha</namePart>
<namePart type="family">Akella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhijit</namePart>
<namePart type="family">Manatkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brijkumar</namePart>
<namePart type="family">Chavda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hima</namePart>
<namePart type="family">Patel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Efficient processing of tabular data is important in various industries, especially when working with datasets containing a large number of columns. Large language models (LLMs) have demonstrated their ability on several tasks through carefully crafted prompts. However, creating effective prompts for tabular datasets is challenging due to the structured nature of the data and the need to manage numerous columns. This paper presents an innovative auto-prompt generation system suitable for multiple LLMs, with minimal training. It proposes two novel methods; 1) A Reinforcement Learning-based algorithm for identifying and sequencing task-relevant columns 2) cell-level similarity-based approach for enhancing few-shot example selection. Our approach has been extensively tested across 66 datasets, demonstrating improved performance in three downstream tasks: data imputation, error detection, and entity matching using two distinct LLMs; Google/flant-t5xxl and Mixtral 8x7B.</abstract>
<identifier type="citekey">akella-etal-2024-automatic</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-industry.16</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-industry.16</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>191</start>
<end>200</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Automatic Prompt Generation System for Tabular Data Tasks
%A Akella, Ashlesha
%A Manatkar, Abhijit
%A Chavda, Brijkumar
%A Patel, Hima
%Y Yang, Yi
%Y Davani, Aida
%Y Sil, Avi
%Y Kumar, Anoop
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F akella-etal-2024-automatic
%X Efficient processing of tabular data is important in various industries, especially when working with datasets containing a large number of columns. Large language models (LLMs) have demonstrated their ability on several tasks through carefully crafted prompts. However, creating effective prompts for tabular datasets is challenging due to the structured nature of the data and the need to manage numerous columns. This paper presents an innovative auto-prompt generation system suitable for multiple LLMs, with minimal training. It proposes two novel methods; 1) A Reinforcement Learning-based algorithm for identifying and sequencing task-relevant columns 2) cell-level similarity-based approach for enhancing few-shot example selection. Our approach has been extensively tested across 66 datasets, demonstrating improved performance in three downstream tasks: data imputation, error detection, and entity matching using two distinct LLMs; Google/flant-t5xxl and Mixtral 8x7B.
%R 10.18653/v1/2024.naacl-industry.16
%U https://aclanthology.org/2024.naacl-industry.16
%U https://doi.org/10.18653/v1/2024.naacl-industry.16
%P 191-200
Markdown (Informal)
[An Automatic Prompt Generation System for Tabular Data Tasks](https://aclanthology.org/2024.naacl-industry.16) (Akella et al., NAACL 2024)
ACL
- Ashlesha Akella, Abhijit Manatkar, Brijkumar Chavda, and Hima Patel. 2024. An Automatic Prompt Generation System for Tabular Data Tasks. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track), pages 191–200, Mexico City, Mexico. Association for Computational Linguistics.