@inproceedings{yang-etal-2024-chamain,
title = "Chamain: Harmonizing Character Persona Integrity with Domain-Adaptive Knowledge in Dialogue Generation",
author = "Yang, Seung-Moo and
Lee, Jeehyun and
Cho, Won Ik",
editor = "Nouri, Elnaz and
Rastogi, Abhinav and
Spithourakis, Georgios and
Liu, Bing and
Chen, Yun-Nung and
Li, Yu and
Albalak, Alon and
Wakaki, Hiromi and
Papangelis, Alexandros",
booktitle = "Proceedings of the 6th Workshop on NLP for Conversational AI (NLP4ConvAI 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nlp4convai-1.7",
pages = "101--113",
abstract = "Recent advances in large language models (LLMs) have shown their capacity for generating natural dialogues, leveraging extensive pre-trained knowledge. However, the seamless integration of domain-specific knowledge into dialogue agents, without undermining their personas or unique textual style, remains a challenging task. Traditional approaches, such as constructing knowledge-aware character dialogue datasets or training LLMs from the ground up, require considerable resources. Sequentially fine-tuning character chatbots across multiple datasets or applying existing merging techniques often leads to catastrophic forgetting, resulting in the loss of both knowledge and the character{'}s distinct persona. This compromises the model{'}s ability to consistently generate character-driven dialogues within a user-centric framework. In this context, we introduce a novel model merging method, Chamain, which effortlessly enhances the performance of character models, much like finding a {``}free lunch{''}. Chamain merges domain-specific knowledge into a character model by parameter-wise weight combination of instruction-tuned models and learns to reflect persona{'}s unique characteristics and style through Layer-wise merging. Our experiments demonstrate that Chamain effectively maintains style while also solving domain-specific problems to a certain extent compared to the baselines, even showing a higher style probability compared to the character model in legal QA.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2024-chamain">
<titleInfo>
<title>Chamain: Harmonizing Character Persona Integrity with Domain-Adaptive Knowledge in Dialogue Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seung-Moo</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeehyun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Won</namePart>
<namePart type="given">Ik</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on NLP for Conversational AI (NLP4ConvAI 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elnaz</namePart>
<namePart type="family">Nouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Rastogi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georgios</namePart>
<namePart type="family">Spithourakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Albalak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiromi</namePart>
<namePart type="family">Wakaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent advances in large language models (LLMs) have shown their capacity for generating natural dialogues, leveraging extensive pre-trained knowledge. However, the seamless integration of domain-specific knowledge into dialogue agents, without undermining their personas or unique textual style, remains a challenging task. Traditional approaches, such as constructing knowledge-aware character dialogue datasets or training LLMs from the ground up, require considerable resources. Sequentially fine-tuning character chatbots across multiple datasets or applying existing merging techniques often leads to catastrophic forgetting, resulting in the loss of both knowledge and the character’s distinct persona. This compromises the model’s ability to consistently generate character-driven dialogues within a user-centric framework. In this context, we introduce a novel model merging method, Chamain, which effortlessly enhances the performance of character models, much like finding a “free lunch”. Chamain merges domain-specific knowledge into a character model by parameter-wise weight combination of instruction-tuned models and learns to reflect persona’s unique characteristics and style through Layer-wise merging. Our experiments demonstrate that Chamain effectively maintains style while also solving domain-specific problems to a certain extent compared to the baselines, even showing a higher style probability compared to the character model in legal QA.</abstract>
<identifier type="citekey">yang-etal-2024-chamain</identifier>
<location>
<url>https://aclanthology.org/2024.nlp4convai-1.7</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>101</start>
<end>113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chamain: Harmonizing Character Persona Integrity with Domain-Adaptive Knowledge in Dialogue Generation
%A Yang, Seung-Moo
%A Lee, Jeehyun
%A Cho, Won Ik
%Y Nouri, Elnaz
%Y Rastogi, Abhinav
%Y Spithourakis, Georgios
%Y Liu, Bing
%Y Chen, Yun-Nung
%Y Li, Yu
%Y Albalak, Alon
%Y Wakaki, Hiromi
%Y Papangelis, Alexandros
%S Proceedings of the 6th Workshop on NLP for Conversational AI (NLP4ConvAI 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F yang-etal-2024-chamain
%X Recent advances in large language models (LLMs) have shown their capacity for generating natural dialogues, leveraging extensive pre-trained knowledge. However, the seamless integration of domain-specific knowledge into dialogue agents, without undermining their personas or unique textual style, remains a challenging task. Traditional approaches, such as constructing knowledge-aware character dialogue datasets or training LLMs from the ground up, require considerable resources. Sequentially fine-tuning character chatbots across multiple datasets or applying existing merging techniques often leads to catastrophic forgetting, resulting in the loss of both knowledge and the character’s distinct persona. This compromises the model’s ability to consistently generate character-driven dialogues within a user-centric framework. In this context, we introduce a novel model merging method, Chamain, which effortlessly enhances the performance of character models, much like finding a “free lunch”. Chamain merges domain-specific knowledge into a character model by parameter-wise weight combination of instruction-tuned models and learns to reflect persona’s unique characteristics and style through Layer-wise merging. Our experiments demonstrate that Chamain effectively maintains style while also solving domain-specific problems to a certain extent compared to the baselines, even showing a higher style probability compared to the character model in legal QA.
%U https://aclanthology.org/2024.nlp4convai-1.7
%P 101-113
Markdown (Informal)
[Chamain: Harmonizing Character Persona Integrity with Domain-Adaptive Knowledge in Dialogue Generation](https://aclanthology.org/2024.nlp4convai-1.7) (Yang et al., NLP4ConvAI-WS 2024)
ACL