@inproceedings{zhang-jaitly-2025-sage,
title = "{SAGE}: Steering Dialog Generation with Future-Aware State-Action Augmentation",
author = "Zhang, Yizhe and
Jaitly, Navdeep",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Frenda, Simona and
Tonelli, Sara and
Dudy, Shiran",
booktitle = "Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nlperspectives-1.11/",
pages = "123--132",
ISBN = "979-8-89176-350-0",
abstract = "Recent advances in large language models have enabled impressive task-oriented applications, yet building emotionally intelligent chatbots for natural, strategic conversations remains challenging. Current approaches often assume a single ``ground truth'' for emotional responses, overlooking the subjectivity of human emotion. We present a novel perspectivist approach, SAGE, that models multiple perspectives in dialogue generation using latent variables. At its core is the State-Action Chain (SAC), which augments standard fine-tuning with latent variables capturing diverse emotional states and conversational strategies between turns, in a future-looking manner. During inference, these variables are generated before each response, enabling multi-perspective control while preserving natural interactions. We also introduce a self-improvement pipeline combining dialogue tree search, LLM-based reward modeling, and targeted fine-tuning to optimize conversational trajectories. Experiments show improved LLM-based judgments while maintaining strong general LLM performance. The discrete latent variables further enable search-based strategies and open avenues for state-level reinforcement learning in dialogue systems, where learning can occur at the state level rather than the token level."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-jaitly-2025-sage">
<titleInfo>
<title>SAGE: Steering Dialog Generation with Future-Aware State-Action Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yizhe</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Navdeep</namePart>
<namePart type="family">Jaitly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gavin</namePart>
<namePart type="family">Abercrombie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Frenda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiran</namePart>
<namePart type="family">Dudy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-350-0</identifier>
</relatedItem>
<abstract>Recent advances in large language models have enabled impressive task-oriented applications, yet building emotionally intelligent chatbots for natural, strategic conversations remains challenging. Current approaches often assume a single “ground truth” for emotional responses, overlooking the subjectivity of human emotion. We present a novel perspectivist approach, SAGE, that models multiple perspectives in dialogue generation using latent variables. At its core is the State-Action Chain (SAC), which augments standard fine-tuning with latent variables capturing diverse emotional states and conversational strategies between turns, in a future-looking manner. During inference, these variables are generated before each response, enabling multi-perspective control while preserving natural interactions. We also introduce a self-improvement pipeline combining dialogue tree search, LLM-based reward modeling, and targeted fine-tuning to optimize conversational trajectories. Experiments show improved LLM-based judgments while maintaining strong general LLM performance. The discrete latent variables further enable search-based strategies and open avenues for state-level reinforcement learning in dialogue systems, where learning can occur at the state level rather than the token level.</abstract>
<identifier type="citekey">zhang-jaitly-2025-sage</identifier>
<location>
<url>https://aclanthology.org/2025.nlperspectives-1.11/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>123</start>
<end>132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SAGE: Steering Dialog Generation with Future-Aware State-Action Augmentation
%A Zhang, Yizhe
%A Jaitly, Navdeep
%Y Abercrombie, Gavin
%Y Basile, Valerio
%Y Frenda, Simona
%Y Tonelli, Sara
%Y Dudy, Shiran
%S Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-350-0
%F zhang-jaitly-2025-sage
%X Recent advances in large language models have enabled impressive task-oriented applications, yet building emotionally intelligent chatbots for natural, strategic conversations remains challenging. Current approaches often assume a single “ground truth” for emotional responses, overlooking the subjectivity of human emotion. We present a novel perspectivist approach, SAGE, that models multiple perspectives in dialogue generation using latent variables. At its core is the State-Action Chain (SAC), which augments standard fine-tuning with latent variables capturing diverse emotional states and conversational strategies between turns, in a future-looking manner. During inference, these variables are generated before each response, enabling multi-perspective control while preserving natural interactions. We also introduce a self-improvement pipeline combining dialogue tree search, LLM-based reward modeling, and targeted fine-tuning to optimize conversational trajectories. Experiments show improved LLM-based judgments while maintaining strong general LLM performance. The discrete latent variables further enable search-based strategies and open avenues for state-level reinforcement learning in dialogue systems, where learning can occur at the state level rather than the token level.
%U https://aclanthology.org/2025.nlperspectives-1.11/
%P 123-132
Markdown (Informal)
[SAGE: Steering Dialog Generation with Future-Aware State-Action Augmentation](https://aclanthology.org/2025.nlperspectives-1.11/) (Zhang & Jaitly, NLPerspectives 2025)
ACL