@inproceedings{duan-etal-2024-prophecy-distillation,
title = "Prophecy Distillation for Boosting Abstractive Summarization",
author = "Duan, Jiaxin and
Lu, Fengyu and
Liu, Junfei",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1160",
pages = "13257--13268",
abstract = "Abstractive summarization models learned with maximum likelihood estimation (MLE) have long been guilty of generating unfaithful facts alongside ambiguous focus. Improved paradigm under the guidance of reference-identified words, i.e., guided summarization, has exhibited remarkable advantages in overcoming this problem. However, it suffers limited real applications since the prophetic guidance is practically agnostic at inference. In this paper, we introduce a novel teacher-student framework, which learns a regular summarization model to mimic the behavior of being guided by prophecy for boosting abstractive summaries. Specifically, by training in probability spaces to follow and distinguish a guided teacher model, a student model learns the key to generating teacher-like quality summaries without any guidance. We refer to this process as prophecy distillation, and it breaks the limitations of both standard and guided summarization. Through extensive experiments, we show that our method achieves new or matched state-of-the-art on four well-known datasets, including ROUGE scores, faithfulness, and saliency awareness. Human evaluations are also carried out to evidence these merits. Furthermore, we conduct empirical studies to analyze how the hyperparameters setting and the guidance choice affect TPG performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="duan-etal-2024-prophecy-distillation">
<titleInfo>
<title>Prophecy Distillation for Boosting Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengyu</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Abstractive summarization models learned with maximum likelihood estimation (MLE) have long been guilty of generating unfaithful facts alongside ambiguous focus. Improved paradigm under the guidance of reference-identified words, i.e., guided summarization, has exhibited remarkable advantages in overcoming this problem. However, it suffers limited real applications since the prophetic guidance is practically agnostic at inference. In this paper, we introduce a novel teacher-student framework, which learns a regular summarization model to mimic the behavior of being guided by prophecy for boosting abstractive summaries. Specifically, by training in probability spaces to follow and distinguish a guided teacher model, a student model learns the key to generating teacher-like quality summaries without any guidance. We refer to this process as prophecy distillation, and it breaks the limitations of both standard and guided summarization. Through extensive experiments, we show that our method achieves new or matched state-of-the-art on four well-known datasets, including ROUGE scores, faithfulness, and saliency awareness. Human evaluations are also carried out to evidence these merits. Furthermore, we conduct empirical studies to analyze how the hyperparameters setting and the guidance choice affect TPG performance.</abstract>
<identifier type="citekey">duan-etal-2024-prophecy-distillation</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1160</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>13257</start>
<end>13268</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prophecy Distillation for Boosting Abstractive Summarization
%A Duan, Jiaxin
%A Lu, Fengyu
%A Liu, Junfei
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F duan-etal-2024-prophecy-distillation
%X Abstractive summarization models learned with maximum likelihood estimation (MLE) have long been guilty of generating unfaithful facts alongside ambiguous focus. Improved paradigm under the guidance of reference-identified words, i.e., guided summarization, has exhibited remarkable advantages in overcoming this problem. However, it suffers limited real applications since the prophetic guidance is practically agnostic at inference. In this paper, we introduce a novel teacher-student framework, which learns a regular summarization model to mimic the behavior of being guided by prophecy for boosting abstractive summaries. Specifically, by training in probability spaces to follow and distinguish a guided teacher model, a student model learns the key to generating teacher-like quality summaries without any guidance. We refer to this process as prophecy distillation, and it breaks the limitations of both standard and guided summarization. Through extensive experiments, we show that our method achieves new or matched state-of-the-art on four well-known datasets, including ROUGE scores, faithfulness, and saliency awareness. Human evaluations are also carried out to evidence these merits. Furthermore, we conduct empirical studies to analyze how the hyperparameters setting and the guidance choice affect TPG performance.
%U https://aclanthology.org/2024.lrec-main.1160
%P 13257-13268
Markdown (Informal)
[Prophecy Distillation for Boosting Abstractive Summarization](https://aclanthology.org/2024.lrec-main.1160) (Duan et al., LREC-COLING 2024)
ACL