@inproceedings{bertsch-etal-2023-mbr,
title = "It{'}s {MBR} All the Way Down: Modern Generation Techniques Through the Lens of Minimum {B}ayes Risk",
author = "Bertsch, Amanda and
Xie, Alex and
Neubig, Graham and
Gormley, Matthew",
editor = "Elazar, Yanai and
Ettinger, Allyson and
Kassner, Nora and
Ruder, Sebastian and
A. Smith, Noah",
booktitle = "Proceedings of the Big Picture Workshop",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bigpicture-1.9",
doi = "10.18653/v1/2023.bigpicture-1.9",
pages = "108--122",
abstract = "Minimum Bayes Risk (MBR) decoding is a method for choosing the outputs of a machine learning system based not on the output with the highest probability, but the output with the lowest risk (expected error) among multiple candidates. It is a simple but powerful method: for an additional cost at inference time, MBR provides reliable several-point improvements across metrics for a wide variety of tasks without any additional data or training. Despite this, MBR is not frequently applied in NLP works, and knowledge of the method itself is limited. We first provide an introduction to the method and the recent literature. We show that several recent methods that do not reference MBR can be written as special cases of MBR; this reformulation provides additional theoretical justification for the performance of these methods, explaining some results that were previously only empirical. We provide theoretical and empirical results about the effectiveness of various MBR variants and make concrete recommendations for the application of MBR in NLP models, including future directions in this area.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bertsch-etal-2023-mbr">
<titleInfo>
<title>It’s MBR All the Way Down: Modern Generation Techniques Through the Lens of Minimum Bayes Risk</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Bertsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Gormley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Big Picture Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Allyson</namePart>
<namePart type="family">Ettinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Kassner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="family">A. Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Minimum Bayes Risk (MBR) decoding is a method for choosing the outputs of a machine learning system based not on the output with the highest probability, but the output with the lowest risk (expected error) among multiple candidates. It is a simple but powerful method: for an additional cost at inference time, MBR provides reliable several-point improvements across metrics for a wide variety of tasks without any additional data or training. Despite this, MBR is not frequently applied in NLP works, and knowledge of the method itself is limited. We first provide an introduction to the method and the recent literature. We show that several recent methods that do not reference MBR can be written as special cases of MBR; this reformulation provides additional theoretical justification for the performance of these methods, explaining some results that were previously only empirical. We provide theoretical and empirical results about the effectiveness of various MBR variants and make concrete recommendations for the application of MBR in NLP models, including future directions in this area.</abstract>
<identifier type="citekey">bertsch-etal-2023-mbr</identifier>
<identifier type="doi">10.18653/v1/2023.bigpicture-1.9</identifier>
<location>
<url>https://aclanthology.org/2023.bigpicture-1.9</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>108</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T It’s MBR All the Way Down: Modern Generation Techniques Through the Lens of Minimum Bayes Risk
%A Bertsch, Amanda
%A Xie, Alex
%A Neubig, Graham
%A Gormley, Matthew
%Y Elazar, Yanai
%Y Ettinger, Allyson
%Y Kassner, Nora
%Y Ruder, Sebastian
%Y A. Smith, Noah
%S Proceedings of the Big Picture Workshop
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F bertsch-etal-2023-mbr
%X Minimum Bayes Risk (MBR) decoding is a method for choosing the outputs of a machine learning system based not on the output with the highest probability, but the output with the lowest risk (expected error) among multiple candidates. It is a simple but powerful method: for an additional cost at inference time, MBR provides reliable several-point improvements across metrics for a wide variety of tasks without any additional data or training. Despite this, MBR is not frequently applied in NLP works, and knowledge of the method itself is limited. We first provide an introduction to the method and the recent literature. We show that several recent methods that do not reference MBR can be written as special cases of MBR; this reformulation provides additional theoretical justification for the performance of these methods, explaining some results that were previously only empirical. We provide theoretical and empirical results about the effectiveness of various MBR variants and make concrete recommendations for the application of MBR in NLP models, including future directions in this area.
%R 10.18653/v1/2023.bigpicture-1.9
%U https://aclanthology.org/2023.bigpicture-1.9
%U https://doi.org/10.18653/v1/2023.bigpicture-1.9
%P 108-122
Markdown (Informal)
[It’s MBR All the Way Down: Modern Generation Techniques Through the Lens of Minimum Bayes Risk](https://aclanthology.org/2023.bigpicture-1.9) (Bertsch et al., BigPicture 2023)
ACL