@inproceedings{de-langis-etal-2024-dynamic,
title = "Dynamic Multi-Reward Weighting for Multi-Style Controllable Generation",
author = "De Langis, Karin and
Koo, Ryan and
Kang, Dongyeop",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.386",
pages = "6783--6800",
abstract = "Textual style expresses a diverse set of information, including interpersonal dynamics (e.g., formality) and the author{'}s emotions or attitudes (e.g., disgust). An open question is how language models can be explicitly controlled so that they weave together target styles when generating text: for example, to produce text that is both negative and non-toxic. One approach to such controlled generation is multi-objective reinforcement learning (RL), but how to best combine multiple objectives in a reward function is an open question. In this paper, we investigate various formulations of multi-style reward formulations, including calibrated outputs from discriminators and dynamic weighting by discriminator gradient magnitudes. We find that our proposed dynamic weighting outperforms static weighting approaches with respect style control while maintaining linguistic quality, and we explore its effectiveness in 2- and 3-style control.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-langis-etal-2024-dynamic">
<titleInfo>
<title>Dynamic Multi-Reward Weighting for Multi-Style Controllable Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">De Langis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Koo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongyeop</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Textual style expresses a diverse set of information, including interpersonal dynamics (e.g., formality) and the author’s emotions or attitudes (e.g., disgust). An open question is how language models can be explicitly controlled so that they weave together target styles when generating text: for example, to produce text that is both negative and non-toxic. One approach to such controlled generation is multi-objective reinforcement learning (RL), but how to best combine multiple objectives in a reward function is an open question. In this paper, we investigate various formulations of multi-style reward formulations, including calibrated outputs from discriminators and dynamic weighting by discriminator gradient magnitudes. We find that our proposed dynamic weighting outperforms static weighting approaches with respect style control while maintaining linguistic quality, and we explore its effectiveness in 2- and 3-style control.</abstract>
<identifier type="citekey">de-langis-etal-2024-dynamic</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.386</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6783</start>
<end>6800</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dynamic Multi-Reward Weighting for Multi-Style Controllable Generation
%A De Langis, Karin
%A Koo, Ryan
%A Kang, Dongyeop
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F de-langis-etal-2024-dynamic
%X Textual style expresses a diverse set of information, including interpersonal dynamics (e.g., formality) and the author’s emotions or attitudes (e.g., disgust). An open question is how language models can be explicitly controlled so that they weave together target styles when generating text: for example, to produce text that is both negative and non-toxic. One approach to such controlled generation is multi-objective reinforcement learning (RL), but how to best combine multiple objectives in a reward function is an open question. In this paper, we investigate various formulations of multi-style reward formulations, including calibrated outputs from discriminators and dynamic weighting by discriminator gradient magnitudes. We find that our proposed dynamic weighting outperforms static weighting approaches with respect style control while maintaining linguistic quality, and we explore its effectiveness in 2- and 3-style control.
%U https://aclanthology.org/2024.emnlp-main.386
%P 6783-6800
Markdown (Informal)
[Dynamic Multi-Reward Weighting for Multi-Style Controllable Generation](https://aclanthology.org/2024.emnlp-main.386) (De Langis et al., EMNLP 2024)
ACL