@inproceedings{zhu-etal-2024-ra2fd,
title = "{RA}2{FD}: Distilling Faithfulness into Efficient Dialogue Systems",
author = "Zhu, Zhiyuan and
Liao, Yusheng and
Xu, Chenxin and
Guan, Yunfeng and
Wang, Yanfeng and
Wang, Yu",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.685",
doi = "10.18653/v1/2024.emnlp-main.685",
pages = "12304--12317",
abstract = "Generating faithful and fast responses is crucial in the knowledge-grounded dialogue. Retrieval Augmented Generation (RAG) strategies are effective but are inference inefficient, while previous Retrieval Free Generations (RFG) are more efficient but sacrifice faithfulness. To solve this faithfulness-efficiency trade-off dilemma, we propose a novel retrieval-free model training scheme named Retrieval Augmented to Retrieval Free Distillation (RA2FD) to build a retrieval-free model that achieves higher faithfulness than the previous RFG method while maintaining inference efficiency. The core idea of RA2FD is to use a teacher-student framework to distill the faithfulness capacity of a teacher, which is an oracle RAG model that generates multiple knowledge-infused responses. The student retrieval-free model learns how to generate faithful responses from these teacher labels through sequence-level distillation and contrastive learning. Experiment results show that RA2FD let the faithfulness performance of an RFG model surpass the previous SOTA RFG baseline on three knowledge-grounded dialogue datasets by an average of 33{\%} and even matching an RAG model{'}s performance while significantly improving inference efficiency. Our code is available at https://github.com/zzysjtuiwct/RA2FD.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2024-ra2fd">
<titleInfo>
<title>RA2FD: Distilling Faithfulness into Efficient Dialogue Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusheng</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenxin</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunfeng</namePart>
<namePart type="family">Guan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanfeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating faithful and fast responses is crucial in the knowledge-grounded dialogue. Retrieval Augmented Generation (RAG) strategies are effective but are inference inefficient, while previous Retrieval Free Generations (RFG) are more efficient but sacrifice faithfulness. To solve this faithfulness-efficiency trade-off dilemma, we propose a novel retrieval-free model training scheme named Retrieval Augmented to Retrieval Free Distillation (RA2FD) to build a retrieval-free model that achieves higher faithfulness than the previous RFG method while maintaining inference efficiency. The core idea of RA2FD is to use a teacher-student framework to distill the faithfulness capacity of a teacher, which is an oracle RAG model that generates multiple knowledge-infused responses. The student retrieval-free model learns how to generate faithful responses from these teacher labels through sequence-level distillation and contrastive learning. Experiment results show that RA2FD let the faithfulness performance of an RFG model surpass the previous SOTA RFG baseline on three knowledge-grounded dialogue datasets by an average of 33% and even matching an RAG model’s performance while significantly improving inference efficiency. Our code is available at https://github.com/zzysjtuiwct/RA2FD.</abstract>
<identifier type="citekey">zhu-etal-2024-ra2fd</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.685</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.685</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>12304</start>
<end>12317</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RA2FD: Distilling Faithfulness into Efficient Dialogue Systems
%A Zhu, Zhiyuan
%A Liao, Yusheng
%A Xu, Chenxin
%A Guan, Yunfeng
%A Wang, Yanfeng
%A Wang, Yu
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhu-etal-2024-ra2fd
%X Generating faithful and fast responses is crucial in the knowledge-grounded dialogue. Retrieval Augmented Generation (RAG) strategies are effective but are inference inefficient, while previous Retrieval Free Generations (RFG) are more efficient but sacrifice faithfulness. To solve this faithfulness-efficiency trade-off dilemma, we propose a novel retrieval-free model training scheme named Retrieval Augmented to Retrieval Free Distillation (RA2FD) to build a retrieval-free model that achieves higher faithfulness than the previous RFG method while maintaining inference efficiency. The core idea of RA2FD is to use a teacher-student framework to distill the faithfulness capacity of a teacher, which is an oracle RAG model that generates multiple knowledge-infused responses. The student retrieval-free model learns how to generate faithful responses from these teacher labels through sequence-level distillation and contrastive learning. Experiment results show that RA2FD let the faithfulness performance of an RFG model surpass the previous SOTA RFG baseline on three knowledge-grounded dialogue datasets by an average of 33% and even matching an RAG model’s performance while significantly improving inference efficiency. Our code is available at https://github.com/zzysjtuiwct/RA2FD.
%R 10.18653/v1/2024.emnlp-main.685
%U https://aclanthology.org/2024.emnlp-main.685
%U https://doi.org/10.18653/v1/2024.emnlp-main.685
%P 12304-12317
Markdown (Informal)
[RA2FD: Distilling Faithfulness into Efficient Dialogue Systems](https://aclanthology.org/2024.emnlp-main.685) (Zhu et al., EMNLP 2024)
ACL
- Zhiyuan Zhu, Yusheng Liao, Chenxin Xu, Yunfeng Guan, Yanfeng Wang, and Yu Wang. 2024. RA2FD: Distilling Faithfulness into Efficient Dialogue Systems. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 12304–12317, Miami, Florida, USA. Association for Computational Linguistics.