@inproceedings{karmaker-santu-feng-2023-teler,
title = "{TEL}e{R}: A General Taxonomy of {LLM} Prompts for Benchmarking Complex Tasks",
author = "Karmaker Santu, Shubhra Kanti and
Feng, Dongji",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.946",
doi = "10.18653/v1/2023.findings-emnlp.946",
pages = "14197--14203",
abstract = "While LLMs have shown great success in understanding and generating text in traditional conversational settings, their potential for performing ill-defined complex tasks is largely under-studied and yet to be benchmarked. However, conducting such benchmarking studies is challenging because of the large variations in LLMs{'} performance when different prompt types/styles are used and different degrees of detail are provided in the prompts. To address this issue, this paper proposes a general taxonomy that can be used to design prompts with specific properties in order to perform a wide range of complex tasks. This taxonomy will allow future benchmarking studies to report the specific categories of prompts used as part of the study, enabling meaningful comparisons across different studies. Also, by establishing a common standard through this taxonomy, researchers will be able to draw more accurate conclusions about LLMs{'} performance on a specific complex task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karmaker-santu-feng-2023-teler">
<titleInfo>
<title>TELeR: A General Taxonomy of LLM Prompts for Benchmarking Complex Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shubhra</namePart>
<namePart type="given">Kanti</namePart>
<namePart type="family">Karmaker Santu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongji</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While LLMs have shown great success in understanding and generating text in traditional conversational settings, their potential for performing ill-defined complex tasks is largely under-studied and yet to be benchmarked. However, conducting such benchmarking studies is challenging because of the large variations in LLMs’ performance when different prompt types/styles are used and different degrees of detail are provided in the prompts. To address this issue, this paper proposes a general taxonomy that can be used to design prompts with specific properties in order to perform a wide range of complex tasks. This taxonomy will allow future benchmarking studies to report the specific categories of prompts used as part of the study, enabling meaningful comparisons across different studies. Also, by establishing a common standard through this taxonomy, researchers will be able to draw more accurate conclusions about LLMs’ performance on a specific complex task.</abstract>
<identifier type="citekey">karmaker-santu-feng-2023-teler</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.946</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.946</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>14197</start>
<end>14203</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TELeR: A General Taxonomy of LLM Prompts for Benchmarking Complex Tasks
%A Karmaker Santu, Shubhra Kanti
%A Feng, Dongji
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F karmaker-santu-feng-2023-teler
%X While LLMs have shown great success in understanding and generating text in traditional conversational settings, their potential for performing ill-defined complex tasks is largely under-studied and yet to be benchmarked. However, conducting such benchmarking studies is challenging because of the large variations in LLMs’ performance when different prompt types/styles are used and different degrees of detail are provided in the prompts. To address this issue, this paper proposes a general taxonomy that can be used to design prompts with specific properties in order to perform a wide range of complex tasks. This taxonomy will allow future benchmarking studies to report the specific categories of prompts used as part of the study, enabling meaningful comparisons across different studies. Also, by establishing a common standard through this taxonomy, researchers will be able to draw more accurate conclusions about LLMs’ performance on a specific complex task.
%R 10.18653/v1/2023.findings-emnlp.946
%U https://aclanthology.org/2023.findings-emnlp.946
%U https://doi.org/10.18653/v1/2023.findings-emnlp.946
%P 14197-14203
Markdown (Informal)
[TELeR: A General Taxonomy of LLM Prompts for Benchmarking Complex Tasks](https://aclanthology.org/2023.findings-emnlp.946) (Karmaker Santu & Feng, Findings 2023)
ACL