@inproceedings{takayama-etal-2025-persona,
title = "Persona-Consistent Dialogue Generation via Pseudo Preference Tuning",
author = "Takayama, Junya and
Ohagi, Masaya and
Mizumoto, Tomoya and
Yoshikawa, Katsumasa",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.369/",
pages = "5507--5514",
abstract = "We propose a simple yet effective method for enhancing persona consistency in dialogue response generation using Direct Preference Optimization (DPO). In our method, we generate responses from the response generation model using persona information that has been randomly swapped with data from other dialogues, treating these responses as pseudo-negative samples. The reference responses serve as positive samples, allowing us to create pseudo-preference data. Experimental results demonstrate that our model, fine-tuned with DPO on the pseudo preference data, produces more consistent and natural responses compared to models trained using supervised fine-tuning or reinforcement learning approaches based on entailment relations between personas and utterances."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="takayama-etal-2025-persona">
<titleInfo>
<title>Persona-Consistent Dialogue Generation via Pseudo Preference Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junya</namePart>
<namePart type="family">Takayama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masaya</namePart>
<namePart type="family">Ohagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoya</namePart>
<namePart type="family">Mizumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsumasa</namePart>
<namePart type="family">Yoshikawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a simple yet effective method for enhancing persona consistency in dialogue response generation using Direct Preference Optimization (DPO). In our method, we generate responses from the response generation model using persona information that has been randomly swapped with data from other dialogues, treating these responses as pseudo-negative samples. The reference responses serve as positive samples, allowing us to create pseudo-preference data. Experimental results demonstrate that our model, fine-tuned with DPO on the pseudo preference data, produces more consistent and natural responses compared to models trained using supervised fine-tuning or reinforcement learning approaches based on entailment relations between personas and utterances.</abstract>
<identifier type="citekey">takayama-etal-2025-persona</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.369/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5507</start>
<end>5514</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Persona-Consistent Dialogue Generation via Pseudo Preference Tuning
%A Takayama, Junya
%A Ohagi, Masaya
%A Mizumoto, Tomoya
%A Yoshikawa, Katsumasa
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F takayama-etal-2025-persona
%X We propose a simple yet effective method for enhancing persona consistency in dialogue response generation using Direct Preference Optimization (DPO). In our method, we generate responses from the response generation model using persona information that has been randomly swapped with data from other dialogues, treating these responses as pseudo-negative samples. The reference responses serve as positive samples, allowing us to create pseudo-preference data. Experimental results demonstrate that our model, fine-tuned with DPO on the pseudo preference data, produces more consistent and natural responses compared to models trained using supervised fine-tuning or reinforcement learning approaches based on entailment relations between personas and utterances.
%U https://aclanthology.org/2025.coling-main.369/
%P 5507-5514
Markdown (Informal)
[Persona-Consistent Dialogue Generation via Pseudo Preference Tuning](https://aclanthology.org/2025.coling-main.369/) (Takayama et al., COLING 2025)
ACL