@inproceedings{dao-etal-2024-experience,
title = "Experience as Source for Anticipation and Planning: Experiential Policy Learning for Target-driven Recommendation Dialogues",
author = "Dao, Huy and
Deng, Yang and
Bui, Khanh-Huyen and
Le, Dung and
Liao, Lizi",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.829",
pages = "14179--14198",
abstract = "Target-driven recommendation dialogues present unique challenges in dialogue management due to the necessity of anticipating user interactions for successful conversations. Current methods face significant limitations: (I) inadequate capabilities for conversation anticipation, (II) computational inefficiencies due to costly simulations, and (III) neglect of valuable past dialogue experiences. To address these limitations, we propose a new framework, Experiential Policy Learning (EPL), for enhancing such dialogues. EPL embodies the principle of Learning From Experience, facilitating anticipation with an experiential scoring function that estimates dialogue state potential using similar past interactions stored in long-term memory. To demonstrate its flexibility, we introduce Tree-structured EPL (T-EPL) as one possible training-free realization with Large Language Models (LLMs) and Monte-Carlo Tree Search (MCTS). T-EPL assesses past dialogue states with LLMs while utilizing MCTS to achieve hierarchical and multi-level reasoning. Extensive experiments on two published datasets demonstrate the superiority and efficacy of T-EPL.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dao-etal-2024-experience">
<titleInfo>
<title>Experience as Source for Anticipation and Planning: Experiential Policy Learning for Target-driven Recommendation Dialogues</title>
</titleInfo>
<name type="personal">
<namePart type="given">Huy</namePart>
<namePart type="family">Dao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khanh-Huyen</namePart>
<namePart type="family">Bui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dung</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lizi</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Target-driven recommendation dialogues present unique challenges in dialogue management due to the necessity of anticipating user interactions for successful conversations. Current methods face significant limitations: (I) inadequate capabilities for conversation anticipation, (II) computational inefficiencies due to costly simulations, and (III) neglect of valuable past dialogue experiences. To address these limitations, we propose a new framework, Experiential Policy Learning (EPL), for enhancing such dialogues. EPL embodies the principle of Learning From Experience, facilitating anticipation with an experiential scoring function that estimates dialogue state potential using similar past interactions stored in long-term memory. To demonstrate its flexibility, we introduce Tree-structured EPL (T-EPL) as one possible training-free realization with Large Language Models (LLMs) and Monte-Carlo Tree Search (MCTS). T-EPL assesses past dialogue states with LLMs while utilizing MCTS to achieve hierarchical and multi-level reasoning. Extensive experiments on two published datasets demonstrate the superiority and efficacy of T-EPL.</abstract>
<identifier type="citekey">dao-etal-2024-experience</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.829</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14179</start>
<end>14198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Experience as Source for Anticipation and Planning: Experiential Policy Learning for Target-driven Recommendation Dialogues
%A Dao, Huy
%A Deng, Yang
%A Bui, Khanh-Huyen
%A Le, Dung
%A Liao, Lizi
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F dao-etal-2024-experience
%X Target-driven recommendation dialogues present unique challenges in dialogue management due to the necessity of anticipating user interactions for successful conversations. Current methods face significant limitations: (I) inadequate capabilities for conversation anticipation, (II) computational inefficiencies due to costly simulations, and (III) neglect of valuable past dialogue experiences. To address these limitations, we propose a new framework, Experiential Policy Learning (EPL), for enhancing such dialogues. EPL embodies the principle of Learning From Experience, facilitating anticipation with an experiential scoring function that estimates dialogue state potential using similar past interactions stored in long-term memory. To demonstrate its flexibility, we introduce Tree-structured EPL (T-EPL) as one possible training-free realization with Large Language Models (LLMs) and Monte-Carlo Tree Search (MCTS). T-EPL assesses past dialogue states with LLMs while utilizing MCTS to achieve hierarchical and multi-level reasoning. Extensive experiments on two published datasets demonstrate the superiority and efficacy of T-EPL.
%U https://aclanthology.org/2024.findings-emnlp.829
%P 14179-14198
Markdown (Informal)
[Experience as Source for Anticipation and Planning: Experiential Policy Learning for Target-driven Recommendation Dialogues](https://aclanthology.org/2024.findings-emnlp.829) (Dao et al., Findings 2024)
ACL