@inproceedings{wu-etal-2022-unsupervised,
title = "Unsupervised Single Document Abstractive Summarization using Semantic Units",
author = "Wu, Jhen-Yi and
Lin, Ying-Jia and
Kao, Hung-Yu",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-main.69",
doi = "10.18653/v1/2022.aacl-main.69",
pages = "954--966",
abstract = "In this work, we study the importance of content frequency on abstractive summarization, where we define the content as {``}semantic units.{''} We propose a two-stage training framework to let the model automatically learn the frequency of each semantic unit in the source text. Our model is trained in an unsupervised manner since the frequency information can be inferred from source text only. During inference, our model identifies sentences with high-frequency semantic units and utilizes frequency information to generate summaries from the filtered sentences. Our model performance on the CNN/Daily Mail summarization task outperforms the other unsupervised methods under the same settings. Furthermore, we achieve competitive ROUGE scores with far fewer model parameters compared to several large-scale pre-trained models. Our model can be trained under low-resource language settings and thus can serve as a potential solution for real-world applications where pre-trained models are not applicable.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2022-unsupervised">
<titleInfo>
<title>Unsupervised Single Document Abstractive Summarization using Semantic Units</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jhen-Yi</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ying-Jia</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-Yu</namePart>
<namePart type="family">Kao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we study the importance of content frequency on abstractive summarization, where we define the content as “semantic units.” We propose a two-stage training framework to let the model automatically learn the frequency of each semantic unit in the source text. Our model is trained in an unsupervised manner since the frequency information can be inferred from source text only. During inference, our model identifies sentences with high-frequency semantic units and utilizes frequency information to generate summaries from the filtered sentences. Our model performance on the CNN/Daily Mail summarization task outperforms the other unsupervised methods under the same settings. Furthermore, we achieve competitive ROUGE scores with far fewer model parameters compared to several large-scale pre-trained models. Our model can be trained under low-resource language settings and thus can serve as a potential solution for real-world applications where pre-trained models are not applicable.</abstract>
<identifier type="citekey">wu-etal-2022-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2022.aacl-main.69</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-main.69</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>954</start>
<end>966</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Single Document Abstractive Summarization using Semantic Units
%A Wu, Jhen-Yi
%A Lin, Ying-Jia
%A Kao, Hung-Yu
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F wu-etal-2022-unsupervised
%X In this work, we study the importance of content frequency on abstractive summarization, where we define the content as “semantic units.” We propose a two-stage training framework to let the model automatically learn the frequency of each semantic unit in the source text. Our model is trained in an unsupervised manner since the frequency information can be inferred from source text only. During inference, our model identifies sentences with high-frequency semantic units and utilizes frequency information to generate summaries from the filtered sentences. Our model performance on the CNN/Daily Mail summarization task outperforms the other unsupervised methods under the same settings. Furthermore, we achieve competitive ROUGE scores with far fewer model parameters compared to several large-scale pre-trained models. Our model can be trained under low-resource language settings and thus can serve as a potential solution for real-world applications where pre-trained models are not applicable.
%R 10.18653/v1/2022.aacl-main.69
%U https://aclanthology.org/2022.aacl-main.69
%U https://doi.org/10.18653/v1/2022.aacl-main.69
%P 954-966
Markdown (Informal)
[Unsupervised Single Document Abstractive Summarization using Semantic Units](https://aclanthology.org/2022.aacl-main.69) (Wu et al., AACL-IJCNLP 2022)
ACL
- Jhen-Yi Wu, Ying-Jia Lin, and Hung-Yu Kao. 2022. Unsupervised Single Document Abstractive Summarization using Semantic Units. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 954–966, Online only. Association for Computational Linguistics.