@inproceedings{howell-etal-2023-distilled,
title = "The economic trade-offs of large language models: A case study",
author = "Howell, Kristen and
Christian, Gwen and
Fomitchov, Pavel and
Kehat, Gitit and
Marzulla, Julianne and
Rolston, Leanne and
Tredup, Jadin and
Zimmerman, Ilana and
Selfridge, Ethan and
Bradley, Joseph",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.24",
doi = "10.18653/v1/2023.acl-industry.24",
pages = "248--267",
abstract = "Contacting customer service via chat is a common practice. Because employing customer service agents is expensive, many companies are turning to NLP that assists human agents by auto-generating responses that can be used directly or with modifications. With their ability to handle large context windows, Large Language Models (LLMs) are a natural fit for this use case. However, their efficacy must be balanced with the cost of training and serving them. This paper assesses the practical cost and impact of LLMs for the enterprise as a function of the usefulness of the responses that they generate. We present a cost framework for evaluating an NLP model{'}s utility for this use case and apply it to a single brand as a case study in the context of an existing agent assistance product. We compare three strategies for specializing an LLM {---} prompt engineering, fine-tuning, and knowledge distillation {---} using feedback from the brand{'}s customer service agents. We find that the usability of a model{'}s responses can make up for a large difference in inference cost for our case study brand, and we extrapolate our findings to the broader enterprise space.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="howell-etal-2023-distilled">
<titleInfo>
<title>The economic trade-offs of large language models: A case study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristen</namePart>
<namePart type="family">Howell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gwen</namePart>
<namePart type="family">Christian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Fomitchov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gitit</namePart>
<namePart type="family">Kehat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julianne</namePart>
<namePart type="family">Marzulla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leanne</namePart>
<namePart type="family">Rolston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jadin</namePart>
<namePart type="family">Tredup</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilana</namePart>
<namePart type="family">Zimmerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ethan</namePart>
<namePart type="family">Selfridge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Bradley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Contacting customer service via chat is a common practice. Because employing customer service agents is expensive, many companies are turning to NLP that assists human agents by auto-generating responses that can be used directly or with modifications. With their ability to handle large context windows, Large Language Models (LLMs) are a natural fit for this use case. However, their efficacy must be balanced with the cost of training and serving them. This paper assesses the practical cost and impact of LLMs for the enterprise as a function of the usefulness of the responses that they generate. We present a cost framework for evaluating an NLP model’s utility for this use case and apply it to a single brand as a case study in the context of an existing agent assistance product. We compare three strategies for specializing an LLM — prompt engineering, fine-tuning, and knowledge distillation — using feedback from the brand’s customer service agents. We find that the usability of a model’s responses can make up for a large difference in inference cost for our case study brand, and we extrapolate our findings to the broader enterprise space.</abstract>
<identifier type="citekey">howell-etal-2023-distilled</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.24</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.24</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>248</start>
<end>267</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The economic trade-offs of large language models: A case study
%A Howell, Kristen
%A Christian, Gwen
%A Fomitchov, Pavel
%A Kehat, Gitit
%A Marzulla, Julianne
%A Rolston, Leanne
%A Tredup, Jadin
%A Zimmerman, Ilana
%A Selfridge, Ethan
%A Bradley, Joseph
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F howell-etal-2023-distilled
%X Contacting customer service via chat is a common practice. Because employing customer service agents is expensive, many companies are turning to NLP that assists human agents by auto-generating responses that can be used directly or with modifications. With their ability to handle large context windows, Large Language Models (LLMs) are a natural fit for this use case. However, their efficacy must be balanced with the cost of training and serving them. This paper assesses the practical cost and impact of LLMs for the enterprise as a function of the usefulness of the responses that they generate. We present a cost framework for evaluating an NLP model’s utility for this use case and apply it to a single brand as a case study in the context of an existing agent assistance product. We compare three strategies for specializing an LLM — prompt engineering, fine-tuning, and knowledge distillation — using feedback from the brand’s customer service agents. We find that the usability of a model’s responses can make up for a large difference in inference cost for our case study brand, and we extrapolate our findings to the broader enterprise space.
%R 10.18653/v1/2023.acl-industry.24
%U https://aclanthology.org/2023.acl-industry.24
%U https://doi.org/10.18653/v1/2023.acl-industry.24
%P 248-267
Markdown (Informal)
[The economic trade-offs of large language models: A case study](https://aclanthology.org/2023.acl-industry.24) (Howell et al., ACL 2023)
ACL
- Kristen Howell, Gwen Christian, Pavel Fomitchov, Gitit Kehat, Julianne Marzulla, Leanne Rolston, Jadin Tredup, Ilana Zimmerman, Ethan Selfridge, and Joseph Bradley. 2023. The economic trade-offs of large language models: A case study. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track), pages 248–267, Toronto, Canada. Association for Computational Linguistics.