@inproceedings{sun-etal-2024-tools,
title = "Tools Fail: Detecting Silent Errors in Faulty Tools",
author = "Sun, Jimin and
Min, So Yeon and
Chang, Yingshan and
Bisk, Yonatan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.790/",
doi = "10.18653/v1/2024.emnlp-main.790",
pages = "14272--14289",
abstract = "Tools have become a mainstay of LLMs, allowing them to retrieve knowledge not in their weights, to perform tasks on the web, and even to control robots. However, most ontologies and surveys of tool-use have assumed the core challenge for LLMs is choosing the tool. Instead, we introduce a framework for tools more broadly which guides us to explore a model`s ability to detect {\textquotedblleft}silent{\textquotedblright} tool errors, and reflect on how to plan. This more directly aligns with the increasingly popular use of models as tools. We provide an initial approach to failure recovery with promising results both on a controlled calculator setting and embodied agent planning."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sun-etal-2024-tools">
<titleInfo>
<title>Tools Fail: Detecting Silent Errors in Faulty Tools</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="given">Yeon</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingshan</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Bisk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Tools have become a mainstay of LLMs, allowing them to retrieve knowledge not in their weights, to perform tasks on the web, and even to control robots. However, most ontologies and surveys of tool-use have assumed the core challenge for LLMs is choosing the tool. Instead, we introduce a framework for tools more broadly which guides us to explore a model‘s ability to detect “silent” tool errors, and reflect on how to plan. This more directly aligns with the increasingly popular use of models as tools. We provide an initial approach to failure recovery with promising results both on a controlled calculator setting and embodied agent planning.</abstract>
<identifier type="citekey">sun-etal-2024-tools</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.790</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.790/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14272</start>
<end>14289</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tools Fail: Detecting Silent Errors in Faulty Tools
%A Sun, Jimin
%A Min, So Yeon
%A Chang, Yingshan
%A Bisk, Yonatan
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F sun-etal-2024-tools
%X Tools have become a mainstay of LLMs, allowing them to retrieve knowledge not in their weights, to perform tasks on the web, and even to control robots. However, most ontologies and surveys of tool-use have assumed the core challenge for LLMs is choosing the tool. Instead, we introduce a framework for tools more broadly which guides us to explore a model‘s ability to detect “silent” tool errors, and reflect on how to plan. This more directly aligns with the increasingly popular use of models as tools. We provide an initial approach to failure recovery with promising results both on a controlled calculator setting and embodied agent planning.
%R 10.18653/v1/2024.emnlp-main.790
%U https://aclanthology.org/2024.emnlp-main.790/
%U https://doi.org/10.18653/v1/2024.emnlp-main.790
%P 14272-14289
Markdown (Informal)
[Tools Fail: Detecting Silent Errors in Faulty Tools](https://aclanthology.org/2024.emnlp-main.790/) (Sun et al., EMNLP 2024)
ACL
- Jimin Sun, So Yeon Min, Yingshan Chang, and Yonatan Bisk. 2024. Tools Fail: Detecting Silent Errors in Faulty Tools. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 14272–14289, Miami, Florida, USA. Association for Computational Linguistics.