@inproceedings{du-etal-2020-general,
title = "General Purpose Text Embeddings from Pre-trained Language Models for Scalable Inference",
author = "Du, Jingfei and
Ott, Myle and
Li, Haoran and
Zhou, Xing and
Stoyanov, Veselin",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.271",
doi = "10.18653/v1/2020.findings-emnlp.271",
pages = "3018--3030",
abstract = "The state of the art on many NLP tasks is currently achieved by large pre-trained language models, which require a considerable amount of computation. We aim to reduce the inference cost in a setting where many different predictions are made on a single piece of text. In that case, computational cost during inference can be amortized over the different predictions (tasks) using a shared text encoder. We compare approaches for training such an encoder and show that encoders pre-trained over multiple tasks generalize well to unseen tasks. We also compare ways of extracting fixed- and limited-size representations from this encoder, including pooling features extracted from multiple layers or positions. Our best approach compares favorably to knowledge distillation, achieving higher accuracy and lower computational cost once the system is handling around 7 tasks. Further, we show that through binary quantization, we can reduce the size of the extracted representations by a factor of 16 to store them for later use. The resulting method offers a compelling solution for using large-scale pre-trained models at a fraction of the computational cost when multiple tasks are performed on the same text.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="du-etal-2020-general">
<titleInfo>
<title>General Purpose Text Embeddings from Pre-trained Language Models for Scalable Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingfei</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Myle</namePart>
<namePart type="family">Ott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoran</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veselin</namePart>
<namePart type="family">Stoyanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The state of the art on many NLP tasks is currently achieved by large pre-trained language models, which require a considerable amount of computation. We aim to reduce the inference cost in a setting where many different predictions are made on a single piece of text. In that case, computational cost during inference can be amortized over the different predictions (tasks) using a shared text encoder. We compare approaches for training such an encoder and show that encoders pre-trained over multiple tasks generalize well to unseen tasks. We also compare ways of extracting fixed- and limited-size representations from this encoder, including pooling features extracted from multiple layers or positions. Our best approach compares favorably to knowledge distillation, achieving higher accuracy and lower computational cost once the system is handling around 7 tasks. Further, we show that through binary quantization, we can reduce the size of the extracted representations by a factor of 16 to store them for later use. The resulting method offers a compelling solution for using large-scale pre-trained models at a fraction of the computational cost when multiple tasks are performed on the same text.</abstract>
<identifier type="citekey">du-etal-2020-general</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.271</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.271</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>3018</start>
<end>3030</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T General Purpose Text Embeddings from Pre-trained Language Models for Scalable Inference
%A Du, Jingfei
%A Ott, Myle
%A Li, Haoran
%A Zhou, Xing
%A Stoyanov, Veselin
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F du-etal-2020-general
%X The state of the art on many NLP tasks is currently achieved by large pre-trained language models, which require a considerable amount of computation. We aim to reduce the inference cost in a setting where many different predictions are made on a single piece of text. In that case, computational cost during inference can be amortized over the different predictions (tasks) using a shared text encoder. We compare approaches for training such an encoder and show that encoders pre-trained over multiple tasks generalize well to unseen tasks. We also compare ways of extracting fixed- and limited-size representations from this encoder, including pooling features extracted from multiple layers or positions. Our best approach compares favorably to knowledge distillation, achieving higher accuracy and lower computational cost once the system is handling around 7 tasks. Further, we show that through binary quantization, we can reduce the size of the extracted representations by a factor of 16 to store them for later use. The resulting method offers a compelling solution for using large-scale pre-trained models at a fraction of the computational cost when multiple tasks are performed on the same text.
%R 10.18653/v1/2020.findings-emnlp.271
%U https://aclanthology.org/2020.findings-emnlp.271
%U https://doi.org/10.18653/v1/2020.findings-emnlp.271
%P 3018-3030
Markdown (Informal)
[General Purpose Text Embeddings from Pre-trained Language Models for Scalable Inference](https://aclanthology.org/2020.findings-emnlp.271) (Du et al., Findings 2020)
ACL