@inproceedings{mekala-etal-2024-toolverifier,
title = "{TOOLVERIFIER}: Generalization to New Tools via Self-Verification",
author = "Mekala, Dheeraj and
Weston, Jason E and
Lanchantin, Jack and
Raileanu, Roberta and
Lomeli, Maria and
Shang, Jingbo and
Dwivedi-Yu, Jane",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.289/",
doi = "10.18653/v1/2024.findings-emnlp.289",
pages = "5026--5041",
abstract = "Teaching language models to use tools is an important milestone towards building general assistants, but remains an open problem. While there has been significant progress on learning to use specific tools via fine-tuning, language models still struggle with learning how to robustly use new tools from only a few demonstrations. In this work we introduce a self-verification method which distinguishes between close candidates by self-asking contrastive questions during (1) tool selection; and parameter generation. We construct synthetic, high-quality, self-generated data for this goal using Llama-2 70B, which we intend to release publicly. Extensive experiments on 4 tasks from the ToolBench benchmark, consisting of 17 unseen tools, demonstrate an average improvement of 22{\%} over few-shot baselines, even in scenarios where the distinctions between candidate tools are finely nuanced."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mekala-etal-2024-toolverifier">
<titleInfo>
<title>TOOLVERIFIER: Generalization to New Tools via Self-Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dheeraj</namePart>
<namePart type="family">Mekala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Weston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Lanchantin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberta</namePart>
<namePart type="family">Raileanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Lomeli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingbo</namePart>
<namePart type="family">Shang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jane</namePart>
<namePart type="family">Dwivedi-Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Teaching language models to use tools is an important milestone towards building general assistants, but remains an open problem. While there has been significant progress on learning to use specific tools via fine-tuning, language models still struggle with learning how to robustly use new tools from only a few demonstrations. In this work we introduce a self-verification method which distinguishes between close candidates by self-asking contrastive questions during (1) tool selection; and parameter generation. We construct synthetic, high-quality, self-generated data for this goal using Llama-2 70B, which we intend to release publicly. Extensive experiments on 4 tasks from the ToolBench benchmark, consisting of 17 unseen tools, demonstrate an average improvement of 22% over few-shot baselines, even in scenarios where the distinctions between candidate tools are finely nuanced.</abstract>
<identifier type="citekey">mekala-etal-2024-toolverifier</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.289</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.289/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>5026</start>
<end>5041</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TOOLVERIFIER: Generalization to New Tools via Self-Verification
%A Mekala, Dheeraj
%A Weston, Jason E.
%A Lanchantin, Jack
%A Raileanu, Roberta
%A Lomeli, Maria
%A Shang, Jingbo
%A Dwivedi-Yu, Jane
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F mekala-etal-2024-toolverifier
%X Teaching language models to use tools is an important milestone towards building general assistants, but remains an open problem. While there has been significant progress on learning to use specific tools via fine-tuning, language models still struggle with learning how to robustly use new tools from only a few demonstrations. In this work we introduce a self-verification method which distinguishes between close candidates by self-asking contrastive questions during (1) tool selection; and parameter generation. We construct synthetic, high-quality, self-generated data for this goal using Llama-2 70B, which we intend to release publicly. Extensive experiments on 4 tasks from the ToolBench benchmark, consisting of 17 unseen tools, demonstrate an average improvement of 22% over few-shot baselines, even in scenarios where the distinctions between candidate tools are finely nuanced.
%R 10.18653/v1/2024.findings-emnlp.289
%U https://aclanthology.org/2024.findings-emnlp.289/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.289
%P 5026-5041
Markdown (Informal)
[TOOLVERIFIER: Generalization to New Tools via Self-Verification](https://aclanthology.org/2024.findings-emnlp.289/) (Mekala et al., Findings 2024)
ACL
- Dheeraj Mekala, Jason E Weston, Jack Lanchantin, Roberta Raileanu, Maria Lomeli, Jingbo Shang, and Jane Dwivedi-Yu. 2024. TOOLVERIFIER: Generalization to New Tools via Self-Verification. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 5026–5041, Miami, Florida, USA. Association for Computational Linguistics.