@inproceedings{gareev-etal-2024-local,
title = "Local and Global Decoding in Text Generation",
author = "Gareev, Daniel and
Hofmann, Thomas and
Krishnasamy, Ezhilmathi and
Pimentel, Tiago",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.854",
pages = "14577--14597",
abstract = "Text generation, a component in applications such as dialogue systems, relies heavily on decoding algorithms that sample strings from a language model distribution. Traditional methods like top-$k$ and top-$\pi$ decoding locally normalise the model{'}s output, which can significantly distort the original distribution. In this paper, we investigate the effects of such distortions by introducing globally-normalised versions of these decoding methods. Further, we propose an independent Metropolis-Hastings (IMH) algorithm to approximate sampling from these globally-normalised distributions without explicitly computing them. Our empirical analyses compare the performance of local and global decoding across two algorithms (top-$k$ and top-$\pi$) with various hyperparameters, using the Pythia language models. Results show that in most configuration, global decoding performs worse than the local decoding versions of the same algorithms, despite preserving the distribution{'}s integrity. Our results thus suggest that distortion might be an important feature of local decoding algorithms.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gareev-etal-2024-local">
<titleInfo>
<title>Local and Global Decoding in Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Gareev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Hofmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ezhilmathi</namePart>
<namePart type="family">Krishnasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Pimentel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text generation, a component in applications such as dialogue systems, relies heavily on decoding algorithms that sample strings from a language model distribution. Traditional methods like top-k and top-π decoding locally normalise the model’s output, which can significantly distort the original distribution. In this paper, we investigate the effects of such distortions by introducing globally-normalised versions of these decoding methods. Further, we propose an independent Metropolis-Hastings (IMH) algorithm to approximate sampling from these globally-normalised distributions without explicitly computing them. Our empirical analyses compare the performance of local and global decoding across two algorithms (top-k and top-π) with various hyperparameters, using the Pythia language models. Results show that in most configuration, global decoding performs worse than the local decoding versions of the same algorithms, despite preserving the distribution’s integrity. Our results thus suggest that distortion might be an important feature of local decoding algorithms.</abstract>
<identifier type="citekey">gareev-etal-2024-local</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.854</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14577</start>
<end>14597</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Local and Global Decoding in Text Generation
%A Gareev, Daniel
%A Hofmann, Thomas
%A Krishnasamy, Ezhilmathi
%A Pimentel, Tiago
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F gareev-etal-2024-local
%X Text generation, a component in applications such as dialogue systems, relies heavily on decoding algorithms that sample strings from a language model distribution. Traditional methods like top-k and top-π decoding locally normalise the model’s output, which can significantly distort the original distribution. In this paper, we investigate the effects of such distortions by introducing globally-normalised versions of these decoding methods. Further, we propose an independent Metropolis-Hastings (IMH) algorithm to approximate sampling from these globally-normalised distributions without explicitly computing them. Our empirical analyses compare the performance of local and global decoding across two algorithms (top-k and top-π) with various hyperparameters, using the Pythia language models. Results show that in most configuration, global decoding performs worse than the local decoding versions of the same algorithms, despite preserving the distribution’s integrity. Our results thus suggest that distortion might be an important feature of local decoding algorithms.
%U https://aclanthology.org/2024.findings-emnlp.854
%P 14577-14597
Markdown (Informal)
[Local and Global Decoding in Text Generation](https://aclanthology.org/2024.findings-emnlp.854) (Gareev et al., Findings 2024)
ACL
- Daniel Gareev, Thomas Hofmann, Ezhilmathi Krishnasamy, and Tiago Pimentel. 2024. Local and Global Decoding in Text Generation. In Findings of the Association for Computational Linguistics: EMNLP 2024, pages 14577–14597, Miami, Florida, USA. Association for Computational Linguistics.