@article{meister-etal-2020-best,
title = "Best-First Beam Search",
author = "Meister, Clara and
Vieira, Tim and
Cotterell, Ryan",
editor = "Johnson, Mark and
Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "8",
year = "2020",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2020.tacl-1.51",
doi = "10.1162/tacl_a_00346",
pages = "795--809",
abstract = "Decoding for many NLP tasks requires an effective heuristic algorithm for approximating exact search because the problem of searching the full output space is often intractable, or impractical in many settings. The default algorithm for this job is beam search{---}a pruned version of breadth-first search. Quite surprisingly, beam search often returns better results than exact inference due to beneficial search bias for NLP tasks. In this work, we show that the standard implementation of beam search can be made up to 10x faster in practice. Our method assumes that the scoring function is monotonic in the sequence length, which allows us to safely prune hypotheses that cannot be in the final set of hypotheses early on. We devise effective monotonic approximations to popular nonmonontic scoring functions, including length normalization and mutual information decoding. Lastly, we propose a memory-reduced variant of best-first beam search, which has a similar beneficial search bias in terms of downstream performance, but runs in a fraction of the time.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="meister-etal-2020-best">
<titleInfo>
<title>Best-First Beam Search</title>
</titleInfo>
<name type="personal">
<namePart type="given">Clara</namePart>
<namePart type="family">Meister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Vieira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Decoding for many NLP tasks requires an effective heuristic algorithm for approximating exact search because the problem of searching the full output space is often intractable, or impractical in many settings. The default algorithm for this job is beam search—a pruned version of breadth-first search. Quite surprisingly, beam search often returns better results than exact inference due to beneficial search bias for NLP tasks. In this work, we show that the standard implementation of beam search can be made up to 10x faster in practice. Our method assumes that the scoring function is monotonic in the sequence length, which allows us to safely prune hypotheses that cannot be in the final set of hypotheses early on. We devise effective monotonic approximations to popular nonmonontic scoring functions, including length normalization and mutual information decoding. Lastly, we propose a memory-reduced variant of best-first beam search, which has a similar beneficial search bias in terms of downstream performance, but runs in a fraction of the time.</abstract>
<identifier type="citekey">meister-etal-2020-best</identifier>
<identifier type="doi">10.1162/tacl_a_00346</identifier>
<location>
<url>https://aclanthology.org/2020.tacl-1.51</url>
</location>
<part>
<date>2020</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>795</start>
<end>809</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Best-First Beam Search
%A Meister, Clara
%A Vieira, Tim
%A Cotterell, Ryan
%J Transactions of the Association for Computational Linguistics
%D 2020
%V 8
%I MIT Press
%C Cambridge, MA
%F meister-etal-2020-best
%X Decoding for many NLP tasks requires an effective heuristic algorithm for approximating exact search because the problem of searching the full output space is often intractable, or impractical in many settings. The default algorithm for this job is beam search—a pruned version of breadth-first search. Quite surprisingly, beam search often returns better results than exact inference due to beneficial search bias for NLP tasks. In this work, we show that the standard implementation of beam search can be made up to 10x faster in practice. Our method assumes that the scoring function is monotonic in the sequence length, which allows us to safely prune hypotheses that cannot be in the final set of hypotheses early on. We devise effective monotonic approximations to popular nonmonontic scoring functions, including length normalization and mutual information decoding. Lastly, we propose a memory-reduced variant of best-first beam search, which has a similar beneficial search bias in terms of downstream performance, but runs in a fraction of the time.
%R 10.1162/tacl_a_00346
%U https://aclanthology.org/2020.tacl-1.51
%U https://doi.org/10.1162/tacl_a_00346
%P 795-809
Markdown (Informal)
[Best-First Beam Search](https://aclanthology.org/2020.tacl-1.51) (Meister et al., TACL 2020)
ACL
- Clara Meister, Tim Vieira, and Ryan Cotterell. 2020. Best-First Beam Search. Transactions of the Association for Computational Linguistics, 8:795–809.