@inproceedings{suppa-etal-2021-cost,
title = "Cost-effective Deployment of {BERT} Models in Serverless Environment",
author = "Suppa, Marek and
Bene{\v{s}}ov{\'a}, Katar{\'\i}na and
{\v{S}}vec, Andrej",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.24",
doi = "10.18653/v1/2021.naacl-industry.24",
pages = "187--195",
abstract = "In this study, we demonstrate the viability of deploying BERT-style models to AWS Lambda in a production environment. Since the freely available pre-trained models are too large to be deployed in this environment, we utilize knowledge distillation and fine-tune the models on proprietary datasets for two real-world tasks: sentiment analysis and semantic textual similarity. As a result, we obtain models that are tuned for a specific domain and deployable in the serverless environment. The subsequent performance analysis shows that this solution does not only report latency levels acceptable for production use but that it is also a cost-effective alternative to small-to-medium size deployments of BERT models, all without any infrastructure overhead.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suppa-etal-2021-cost">
<titleInfo>
<title>Cost-effective Deployment of BERT Models in Serverless Environment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Suppa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katarína</namePart>
<namePart type="family">Benešová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrej</namePart>
<namePart type="family">Švec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we demonstrate the viability of deploying BERT-style models to AWS Lambda in a production environment. Since the freely available pre-trained models are too large to be deployed in this environment, we utilize knowledge distillation and fine-tune the models on proprietary datasets for two real-world tasks: sentiment analysis and semantic textual similarity. As a result, we obtain models that are tuned for a specific domain and deployable in the serverless environment. The subsequent performance analysis shows that this solution does not only report latency levels acceptable for production use but that it is also a cost-effective alternative to small-to-medium size deployments of BERT models, all without any infrastructure overhead.</abstract>
<identifier type="citekey">suppa-etal-2021-cost</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.24</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.24</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>187</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cost-effective Deployment of BERT Models in Serverless Environment
%A Suppa, Marek
%A Benešová, Katarína
%A Švec, Andrej
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F suppa-etal-2021-cost
%X In this study, we demonstrate the viability of deploying BERT-style models to AWS Lambda in a production environment. Since the freely available pre-trained models are too large to be deployed in this environment, we utilize knowledge distillation and fine-tune the models on proprietary datasets for two real-world tasks: sentiment analysis and semantic textual similarity. As a result, we obtain models that are tuned for a specific domain and deployable in the serverless environment. The subsequent performance analysis shows that this solution does not only report latency levels acceptable for production use but that it is also a cost-effective alternative to small-to-medium size deployments of BERT models, all without any infrastructure overhead.
%R 10.18653/v1/2021.naacl-industry.24
%U https://aclanthology.org/2021.naacl-industry.24
%U https://doi.org/10.18653/v1/2021.naacl-industry.24
%P 187-195
Markdown (Informal)
[Cost-effective Deployment of BERT Models in Serverless Environment](https://aclanthology.org/2021.naacl-industry.24) (Suppa et al., NAACL 2021)
ACL
- Marek Suppa, Katarína Benešová, and Andrej Švec. 2021. Cost-effective Deployment of BERT Models in Serverless Environment. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pages 187–195, Online. Association for Computational Linguistics.