@inproceedings{soares-etal-2023-nail,
title = "{NAIL}: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders",
author = "Soares, Livio and
Gillick, Daniel and
Cole, Jeremy and
Kwiatkowski, Tom",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.156",
doi = "10.18653/v1/2023.emnlp-main.156",
pages = "2574--2589",
abstract = "Neural document rerankers are extremely effective in terms of accuracy. However, the best models require dedicated hardware for serving, which is costly and often not feasible. To avoid this servingtime requirement, we present a method of capturing up to 86{\%} of the gains of a Transformer cross-attention model with a lexicalized scoring function that only requires 10$^{-6}${\%} of the Transformer{'}s FLOPs per document and can be served using commodity CPUs. When combined with a BM25 retriever, this approach matches the quality of a state-of-the art dual encoder retriever, that still requires an accelerator for query encoding. We introduce nail (Non-Autoregressive Indexing with Language models) as a model architecture that is compatible with recent encoder-decoder and decoder-only large language models, such as T5, GPT-3 and PaLM. This model architecture can leverage existing pre-trained checkpoints and can be fine-tuned for efficiently constructing document representations that do not require neural processing of queries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soares-etal-2023-nail">
<titleInfo>
<title>NAIL: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders</title>
</titleInfo>
<name type="personal">
<namePart type="given">Livio</namePart>
<namePart type="family">Soares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Gillick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Cole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kwiatkowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural document rerankers are extremely effective in terms of accuracy. However, the best models require dedicated hardware for serving, which is costly and often not feasible. To avoid this servingtime requirement, we present a method of capturing up to 86% of the gains of a Transformer cross-attention model with a lexicalized scoring function that only requires 10⁻6% of the Transformer’s FLOPs per document and can be served using commodity CPUs. When combined with a BM25 retriever, this approach matches the quality of a state-of-the art dual encoder retriever, that still requires an accelerator for query encoding. We introduce nail (Non-Autoregressive Indexing with Language models) as a model architecture that is compatible with recent encoder-decoder and decoder-only large language models, such as T5, GPT-3 and PaLM. This model architecture can leverage existing pre-trained checkpoints and can be fine-tuned for efficiently constructing document representations that do not require neural processing of queries.</abstract>
<identifier type="citekey">soares-etal-2023-nail</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.156</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.156</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>2574</start>
<end>2589</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NAIL: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders
%A Soares, Livio
%A Gillick, Daniel
%A Cole, Jeremy
%A Kwiatkowski, Tom
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F soares-etal-2023-nail
%X Neural document rerankers are extremely effective in terms of accuracy. However, the best models require dedicated hardware for serving, which is costly and often not feasible. To avoid this servingtime requirement, we present a method of capturing up to 86% of the gains of a Transformer cross-attention model with a lexicalized scoring function that only requires 10⁻6% of the Transformer’s FLOPs per document and can be served using commodity CPUs. When combined with a BM25 retriever, this approach matches the quality of a state-of-the art dual encoder retriever, that still requires an accelerator for query encoding. We introduce nail (Non-Autoregressive Indexing with Language models) as a model architecture that is compatible with recent encoder-decoder and decoder-only large language models, such as T5, GPT-3 and PaLM. This model architecture can leverage existing pre-trained checkpoints and can be fine-tuned for efficiently constructing document representations that do not require neural processing of queries.
%R 10.18653/v1/2023.emnlp-main.156
%U https://aclanthology.org/2023.emnlp-main.156
%U https://doi.org/10.18653/v1/2023.emnlp-main.156
%P 2574-2589
Markdown (Informal)
[NAIL: Lexical Retrieval Indices with Efficient Non-Autoregressive Decoders](https://aclanthology.org/2023.emnlp-main.156) (Soares et al., EMNLP 2023)
ACL