@inproceedings{tiwari-etal-2023-gatekeeper,
title = "Gatekeeper to save {COGS} and improve efficiency of Text Prediction",
author = "Tiwari, Nidhi and
Kola, Sneha and
Milunovic, Milos and
Chen, Si-qing and
Slavkovski, Marjan",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-industry.5",
doi = "10.18653/v1/2023.emnlp-industry.5",
pages = "46--53",
abstract = "The text prediction (TP) workflow calls a Large Language Model (LLM), almost, after every character to get subsequent sequence of characters, till user accepts a suggestion. The confidence score of the prediction is commonly used for filtering the results to ensure that only correct predictions are shown to user. As LLMs require massive amounts of computation and storage, such an approach incurs network and high execution cost. So, we propose a Model gatekeeper (GK) to stop the LLM calls that will result in incorrect predictions at client application level itself. This way a GK can save cost of model inference and improve user experience by not showing the incorrect predictions. We demonstrate that use of a model gatekeeper saved approx 46.6{\%} of COGS for TP, at the cost of approx 4.5{\%} loss in character saving. Use of GK also improved the efficiency (suggestion rate) of TP model by 73{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tiwari-etal-2023-gatekeeper">
<titleInfo>
<title>Gatekeeper to save COGS and improve efficiency of Text Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nidhi</namePart>
<namePart type="family">Tiwari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sneha</namePart>
<namePart type="family">Kola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Milos</namePart>
<namePart type="family">Milunovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Si-qing</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marjan</namePart>
<namePart type="family">Slavkovski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingxuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The text prediction (TP) workflow calls a Large Language Model (LLM), almost, after every character to get subsequent sequence of characters, till user accepts a suggestion. The confidence score of the prediction is commonly used for filtering the results to ensure that only correct predictions are shown to user. As LLMs require massive amounts of computation and storage, such an approach incurs network and high execution cost. So, we propose a Model gatekeeper (GK) to stop the LLM calls that will result in incorrect predictions at client application level itself. This way a GK can save cost of model inference and improve user experience by not showing the incorrect predictions. We demonstrate that use of a model gatekeeper saved approx 46.6% of COGS for TP, at the cost of approx 4.5% loss in character saving. Use of GK also improved the efficiency (suggestion rate) of TP model by 73%.</abstract>
<identifier type="citekey">tiwari-etal-2023-gatekeeper</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-industry.5</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-industry.5</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>46</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gatekeeper to save COGS and improve efficiency of Text Prediction
%A Tiwari, Nidhi
%A Kola, Sneha
%A Milunovic, Milos
%A Chen, Si-qing
%A Slavkovski, Marjan
%Y Wang, Mingxuan
%Y Zitouni, Imed
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F tiwari-etal-2023-gatekeeper
%X The text prediction (TP) workflow calls a Large Language Model (LLM), almost, after every character to get subsequent sequence of characters, till user accepts a suggestion. The confidence score of the prediction is commonly used for filtering the results to ensure that only correct predictions are shown to user. As LLMs require massive amounts of computation and storage, such an approach incurs network and high execution cost. So, we propose a Model gatekeeper (GK) to stop the LLM calls that will result in incorrect predictions at client application level itself. This way a GK can save cost of model inference and improve user experience by not showing the incorrect predictions. We demonstrate that use of a model gatekeeper saved approx 46.6% of COGS for TP, at the cost of approx 4.5% loss in character saving. Use of GK also improved the efficiency (suggestion rate) of TP model by 73%.
%R 10.18653/v1/2023.emnlp-industry.5
%U https://aclanthology.org/2023.emnlp-industry.5
%U https://doi.org/10.18653/v1/2023.emnlp-industry.5
%P 46-53
Markdown (Informal)
[Gatekeeper to save COGS and improve efficiency of Text Prediction](https://aclanthology.org/2023.emnlp-industry.5) (Tiwari et al., EMNLP 2023)
ACL
- Nidhi Tiwari, Sneha Kola, Milos Milunovic, Si-qing Chen, and Marjan Slavkovski. 2023. Gatekeeper to save COGS and improve efficiency of Text Prediction. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 46–53, Singapore. Association for Computational Linguistics.