@inproceedings{erdogan-etal-2024-tinyagent,
title = "{T}iny{A}gent: Function Calling at the Edge",
author = "Erdogan, Lutfi Eren and
Lee, Nicholas and
Jha, Siddharth and
Kim, Sehoon and
Tabrizi, Ryan and
Moon, Suhong and
Hooper, Coleman Richard Charles and
Anumanchipalli, Gopala and
Keutzer, Kurt and
Gholami, Amir",
editor = "Hernandez Farias, Delia Irazu and
Hope, Tom and
Li, Manling",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-demo.9",
pages = "80--88",
abstract = "Recent large language models (LLMs) have enabled the development of advanced agentic systems that can integrate various tools and APIs to fulfill user queries through function calling. However, the deployment of these LLMs on the edge has not been explored since they typically require cloud-based infrastructure due to their substantial model size and computational demands. To this end, we present TinyAgent, an end-to-end framework for training and deploying task-specific small language model agents capable of function calling for driving agentic systems at the edge. We first show how to enable accurate function calling for open-source models via the LLMCompiler framework. We then systematically curate a high-quality dataset for function calling, which we use to fine-tune two small language models, TinyAgent-1.1B and 7B. For efficient inference, we introduce a novel tool retrieval method to reduce the input prompt length and utilize quantization to further accelerate the inference speed. As a driving application, we demonstrate a local Siri-like system for Apple{'}s MacBook that can execute user commands through text or voice input. Our results show that our models can achieve, and even surpass, the function-calling capabilities of larger models like GPT-4-Turbo, while being fully deployed at the edge. We open-source our [dataset, models, and installable package](https://github.com/SqueezeAILab/TinyAgent) and provide a [demo video](https://www.youtube.com/watch?v=0GvaGL9IDpQ) for our MacBook assistant agent.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erdogan-etal-2024-tinyagent">
<titleInfo>
<title>TinyAgent: Function Calling at the Edge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lutfi</namePart>
<namePart type="given">Eren</namePart>
<namePart type="family">Erdogan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sehoon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Tabrizi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suhong</namePart>
<namePart type="family">Moon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Coleman</namePart>
<namePart type="given">Richard</namePart>
<namePart type="given">Charles</namePart>
<namePart type="family">Hooper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gopala</namePart>
<namePart type="family">Anumanchipalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kurt</namePart>
<namePart type="family">Keutzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Gholami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Delia</namePart>
<namePart type="given">Irazu</namePart>
<namePart type="family">Hernandez Farias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent large language models (LLMs) have enabled the development of advanced agentic systems that can integrate various tools and APIs to fulfill user queries through function calling. However, the deployment of these LLMs on the edge has not been explored since they typically require cloud-based infrastructure due to their substantial model size and computational demands. To this end, we present TinyAgent, an end-to-end framework for training and deploying task-specific small language model agents capable of function calling for driving agentic systems at the edge. We first show how to enable accurate function calling for open-source models via the LLMCompiler framework. We then systematically curate a high-quality dataset for function calling, which we use to fine-tune two small language models, TinyAgent-1.1B and 7B. For efficient inference, we introduce a novel tool retrieval method to reduce the input prompt length and utilize quantization to further accelerate the inference speed. As a driving application, we demonstrate a local Siri-like system for Apple’s MacBook that can execute user commands through text or voice input. Our results show that our models can achieve, and even surpass, the function-calling capabilities of larger models like GPT-4-Turbo, while being fully deployed at the edge. We open-source our [dataset, models, and installable package](https://github.com/SqueezeAILab/TinyAgent) and provide a [demo video](https://www.youtube.com/watch?v=0GvaGL9IDpQ) for our MacBook assistant agent.</abstract>
<identifier type="citekey">erdogan-etal-2024-tinyagent</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-demo.9</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>80</start>
<end>88</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TinyAgent: Function Calling at the Edge
%A Erdogan, Lutfi Eren
%A Lee, Nicholas
%A Jha, Siddharth
%A Kim, Sehoon
%A Tabrizi, Ryan
%A Moon, Suhong
%A Hooper, Coleman Richard Charles
%A Anumanchipalli, Gopala
%A Keutzer, Kurt
%A Gholami, Amir
%Y Hernandez Farias, Delia Irazu
%Y Hope, Tom
%Y Li, Manling
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F erdogan-etal-2024-tinyagent
%X Recent large language models (LLMs) have enabled the development of advanced agentic systems that can integrate various tools and APIs to fulfill user queries through function calling. However, the deployment of these LLMs on the edge has not been explored since they typically require cloud-based infrastructure due to their substantial model size and computational demands. To this end, we present TinyAgent, an end-to-end framework for training and deploying task-specific small language model agents capable of function calling for driving agentic systems at the edge. We first show how to enable accurate function calling for open-source models via the LLMCompiler framework. We then systematically curate a high-quality dataset for function calling, which we use to fine-tune two small language models, TinyAgent-1.1B and 7B. For efficient inference, we introduce a novel tool retrieval method to reduce the input prompt length and utilize quantization to further accelerate the inference speed. As a driving application, we demonstrate a local Siri-like system for Apple’s MacBook that can execute user commands through text or voice input. Our results show that our models can achieve, and even surpass, the function-calling capabilities of larger models like GPT-4-Turbo, while being fully deployed at the edge. We open-source our [dataset, models, and installable package](https://github.com/SqueezeAILab/TinyAgent) and provide a [demo video](https://www.youtube.com/watch?v=0GvaGL9IDpQ) for our MacBook assistant agent.
%U https://aclanthology.org/2024.emnlp-demo.9
%P 80-88
Markdown (Informal)
[TinyAgent: Function Calling at the Edge](https://aclanthology.org/2024.emnlp-demo.9) (Erdogan et al., EMNLP 2024)
ACL
- Lutfi Eren Erdogan, Nicholas Lee, Siddharth Jha, Sehoon Kim, Ryan Tabrizi, Suhong Moon, Coleman Richard Charles Hooper, Gopala Anumanchipalli, Kurt Keutzer, and Amir Gholami. 2024. TinyAgent: Function Calling at the Edge. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 80–88, Miami, Florida, USA. Association for Computational Linguistics.