@inproceedings{wang-etal-2025-battling,
title = "Battling against Tough Resister: Strategy Planning with Adversarial Game for Non-collaborative Dialogues",
author = "Wang, Haiyang and
Tian, Zhiliang and
Pan, Yuchen and
Song, Xin and
Niu, Xin and
Huang, Minlie and
Zhou, Bin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.184/",
doi = "10.18653/v1/2025.acl-long.184",
pages = "3665--3685",
ISBN = "979-8-89176-251-0",
abstract = "Non-collaborative dialogue involves two participants with conflicting interests engaging in a multi-round dialogue to achieve their own goals. Strategy planning is the key to guiding both participants towards a consensus. Most LLMs-based methods use stimulus prompts or external strategy planners for strategy planning. However, stimulus prompts fail to teach LLMs to plan dialogue strategies explicitly. Moreover, training external strategy planners doesn{'}t fully account for adversarial interactions, thereby limiting their effectiveness against tough resisters. In this paper, to mitigate the above issues, we propose $\textbf{GAIA}$, a $\textbf{G}$ame-based $\textbf{A}$dversarial self-play $\textbf{I}$nter$\textbf{A}$ctive training paradigm, which constructs an adversarial two-player (a persuader and a resister) zero-sum game and guides the game to approximate Nash Equilibrium (NE) via reinforcement learning (RL) for the non-collaborative dialogues. First, we design a Chain-of-Mind prompt to reason the resister{'}s dialogue act step-by-step to plan the persuasive strategies. Secondly, to adversarially improve the persuader, we construct diverse resistant planners and theoretically improve the persuader{'}s optimal lower bound. Finally, we iteratively optimise their policies via adversarial self-play interactive RL and design an $\epsilon$-NE verification algorithm to approximate the game{'}s NE. Experiments on three datasets show that our model obtains state-of-the-art performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-battling">
<titleInfo>
<title>Battling against Tough Resister: Strategy Planning with Adversarial Game for Non-collaborative Dialogues</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haiyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiliang</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuchen</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Niu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minlie</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Non-collaborative dialogue involves two participants with conflicting interests engaging in a multi-round dialogue to achieve their own goals. Strategy planning is the key to guiding both participants towards a consensus. Most LLMs-based methods use stimulus prompts or external strategy planners for strategy planning. However, stimulus prompts fail to teach LLMs to plan dialogue strategies explicitly. Moreover, training external strategy planners doesn’t fully account for adversarial interactions, thereby limiting their effectiveness against tough resisters. In this paper, to mitigate the above issues, we propose GAIA, a Game-based Adversarial self-play InterActive training paradigm, which constructs an adversarial two-player (a persuader and a resister) zero-sum game and guides the game to approximate Nash Equilibrium (NE) via reinforcement learning (RL) for the non-collaborative dialogues. First, we design a Chain-of-Mind prompt to reason the resister’s dialogue act step-by-step to plan the persuasive strategies. Secondly, to adversarially improve the persuader, we construct diverse resistant planners and theoretically improve the persuader’s optimal lower bound. Finally, we iteratively optimise their policies via adversarial self-play interactive RL and design an ε-NE verification algorithm to approximate the game’s NE. Experiments on three datasets show that our model obtains state-of-the-art performance.</abstract>
<identifier type="citekey">wang-etal-2025-battling</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.184</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.184/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>3665</start>
<end>3685</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Battling against Tough Resister: Strategy Planning with Adversarial Game for Non-collaborative Dialogues
%A Wang, Haiyang
%A Tian, Zhiliang
%A Pan, Yuchen
%A Song, Xin
%A Niu, Xin
%A Huang, Minlie
%A Zhou, Bin
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F wang-etal-2025-battling
%X Non-collaborative dialogue involves two participants with conflicting interests engaging in a multi-round dialogue to achieve their own goals. Strategy planning is the key to guiding both participants towards a consensus. Most LLMs-based methods use stimulus prompts or external strategy planners for strategy planning. However, stimulus prompts fail to teach LLMs to plan dialogue strategies explicitly. Moreover, training external strategy planners doesn’t fully account for adversarial interactions, thereby limiting their effectiveness against tough resisters. In this paper, to mitigate the above issues, we propose GAIA, a Game-based Adversarial self-play InterActive training paradigm, which constructs an adversarial two-player (a persuader and a resister) zero-sum game and guides the game to approximate Nash Equilibrium (NE) via reinforcement learning (RL) for the non-collaborative dialogues. First, we design a Chain-of-Mind prompt to reason the resister’s dialogue act step-by-step to plan the persuasive strategies. Secondly, to adversarially improve the persuader, we construct diverse resistant planners and theoretically improve the persuader’s optimal lower bound. Finally, we iteratively optimise their policies via adversarial self-play interactive RL and design an ε-NE verification algorithm to approximate the game’s NE. Experiments on three datasets show that our model obtains state-of-the-art performance.
%R 10.18653/v1/2025.acl-long.184
%U https://aclanthology.org/2025.acl-long.184/
%U https://doi.org/10.18653/v1/2025.acl-long.184
%P 3665-3685
Markdown (Informal)
[Battling against Tough Resister: Strategy Planning with Adversarial Game for Non-collaborative Dialogues](https://aclanthology.org/2025.acl-long.184/) (Wang et al., ACL 2025)
ACL