@inproceedings{he-etal-2023-empirical,
title = "Empirical Analysis of Beam Search Curse and Search Errors with Model Errors in Neural Machine Translation",
author = "He, Jianfei and
Sun, Shichao and
Jia, Xiaohua and
Li, Wenjie",
editor = "Nurminen, Mary and
Brenner, Judith and
Koponen, Maarit and
Latomaa, Sirkku and
Mikhailov, Mikhail and
Schierl, Frederike and
Ranasinghe, Tharindu and
Vanmassenhove, Eva and
Vidal, Sergi Alvarez and
Aranberri, Nora and
Nunziatini, Mara and
Escart{\'\i}n, Carla Parra and
Forcada, Mikel and
Popovic, Maja and
Scarton, Carolina and
Moniz, Helena",
booktitle = "Proceedings of the 24th Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2023",
address = "Tampere, Finland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2023.eamt-1.10",
pages = "91--101",
abstract = "Beam search is the most popular decoding method for Neural Machine Translation (NMT) and is still a strong baseline compared with the newly proposed sampling-based methods. To better understand beam search, we investigate its two well-recognized issues, beam search curse and search errors, at the sentence level. We find that only less than 30{\%} of sentences in the test set experience these issues. Meanwhile, there is a related phenomenon. For the majority of sentences, their gold references have lower probabilities than the predictions from beam search. We also test with different levels of model errors including a special test using training samples and models without regularization. We find that these phenomena still exist even for a model with an accuracy of 95{\%} although they are mitigated. These findings show that it is not promising to improve beam search by seeking higher probabilities in searching and further reducing its search errors. The relationship between the quality and the probability of predictions at the sentence level in our results provides useful information to find new ways to improve NMT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="he-etal-2023-empirical">
<titleInfo>
<title>Empirical Analysis of Beam Search Curse and Search Errors with Model Errors in Neural Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jianfei</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shichao</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaohua</namePart>
<namePart type="family">Jia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Brenner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarit</namePart>
<namePart type="family">Koponen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sirkku</namePart>
<namePart type="family">Latomaa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikhail</namePart>
<namePart type="family">Mikhailov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederike</namePart>
<namePart type="family">Schierl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Vanmassenhove</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergi</namePart>
<namePart type="given">Alvarez</namePart>
<namePart type="family">Vidal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Aranberri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mara</namePart>
<namePart type="family">Nunziatini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carla</namePart>
<namePart type="given">Parra</namePart>
<namePart type="family">Escartín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Forcada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Popovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Tampere, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Beam search is the most popular decoding method for Neural Machine Translation (NMT) and is still a strong baseline compared with the newly proposed sampling-based methods. To better understand beam search, we investigate its two well-recognized issues, beam search curse and search errors, at the sentence level. We find that only less than 30% of sentences in the test set experience these issues. Meanwhile, there is a related phenomenon. For the majority of sentences, their gold references have lower probabilities than the predictions from beam search. We also test with different levels of model errors including a special test using training samples and models without regularization. We find that these phenomena still exist even for a model with an accuracy of 95% although they are mitigated. These findings show that it is not promising to improve beam search by seeking higher probabilities in searching and further reducing its search errors. The relationship between the quality and the probability of predictions at the sentence level in our results provides useful information to find new ways to improve NMT.</abstract>
<identifier type="citekey">he-etal-2023-empirical</identifier>
<location>
<url>https://aclanthology.org/2023.eamt-1.10</url>
</location>
<part>
<date>2023-06</date>
<extent unit="page">
<start>91</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Empirical Analysis of Beam Search Curse and Search Errors with Model Errors in Neural Machine Translation
%A He, Jianfei
%A Sun, Shichao
%A Jia, Xiaohua
%A Li, Wenjie
%Y Nurminen, Mary
%Y Brenner, Judith
%Y Koponen, Maarit
%Y Latomaa, Sirkku
%Y Mikhailov, Mikhail
%Y Schierl, Frederike
%Y Ranasinghe, Tharindu
%Y Vanmassenhove, Eva
%Y Vidal, Sergi Alvarez
%Y Aranberri, Nora
%Y Nunziatini, Mara
%Y Escartín, Carla Parra
%Y Forcada, Mikel
%Y Popovic, Maja
%Y Scarton, Carolina
%Y Moniz, Helena
%S Proceedings of the 24th Annual Conference of the European Association for Machine Translation
%D 2023
%8 June
%I European Association for Machine Translation
%C Tampere, Finland
%F he-etal-2023-empirical
%X Beam search is the most popular decoding method for Neural Machine Translation (NMT) and is still a strong baseline compared with the newly proposed sampling-based methods. To better understand beam search, we investigate its two well-recognized issues, beam search curse and search errors, at the sentence level. We find that only less than 30% of sentences in the test set experience these issues. Meanwhile, there is a related phenomenon. For the majority of sentences, their gold references have lower probabilities than the predictions from beam search. We also test with different levels of model errors including a special test using training samples and models without regularization. We find that these phenomena still exist even for a model with an accuracy of 95% although they are mitigated. These findings show that it is not promising to improve beam search by seeking higher probabilities in searching and further reducing its search errors. The relationship between the quality and the probability of predictions at the sentence level in our results provides useful information to find new ways to improve NMT.
%U https://aclanthology.org/2023.eamt-1.10
%P 91-101
Markdown (Informal)
[Empirical Analysis of Beam Search Curse and Search Errors with Model Errors in Neural Machine Translation](https://aclanthology.org/2023.eamt-1.10) (He et al., EAMT 2023)
ACL