@inproceedings{huang-etal-2022-unsupervised,
title = "Unsupervised Text Summarization of Long Documents using Dependency-based Noun Phrases and Contextual Order Arrangement",
author = "Huang, Yen-Hao and
Lan, Hsiao-Yen and
Chen, Yi-Shin",
editor = "Chang, Yung-Chun and
Huang, Yi-Chin",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.3/",
pages = "15--24",
abstract = "Unsupervised extractive summarization has recently gained importance since it does not require labeled data. Among unsupervised methods, graph-based approaches have achieved outstanding results. These methods represent each document by a graph, with sentences as nodes and word-level similarity among sentences as edges. Common words can easily lead to a strong connection between sentence nodes. Thus, sentences with many common words can be misinterpreted as salient sentences for a summary. This work addresses the common word issue with a phrase-level graph that (1) focuses on the noun phrases of a document based on grammar dependencies and (2) initializes edge weights by term-frequency within the target document and inverse document frequency over the entire corpus. The importance scores of noun phrases extracted from the graph are then used to select the most salient sentences. To preserve summary coherence, the order of the selected sentences is re-arranged by a flow-aware orderBERT. The results reveal that our unsupervised framework outperformed other extractive methods on ROUGE as well as two human evaluations for semantic similarity and summary coherence."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2022-unsupervised">
<titleInfo>
<title>Unsupervised Text Summarization of Long Documents using Dependency-based Noun Phrases and Contextual Order Arrangement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yen-Hao</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsiao-Yen</namePart>
<namePart type="family">Lan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Shin</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yung-Chun</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Chin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Unsupervised extractive summarization has recently gained importance since it does not require labeled data. Among unsupervised methods, graph-based approaches have achieved outstanding results. These methods represent each document by a graph, with sentences as nodes and word-level similarity among sentences as edges. Common words can easily lead to a strong connection between sentence nodes. Thus, sentences with many common words can be misinterpreted as salient sentences for a summary. This work addresses the common word issue with a phrase-level graph that (1) focuses on the noun phrases of a document based on grammar dependencies and (2) initializes edge weights by term-frequency within the target document and inverse document frequency over the entire corpus. The importance scores of noun phrases extracted from the graph are then used to select the most salient sentences. To preserve summary coherence, the order of the selected sentences is re-arranged by a flow-aware orderBERT. The results reveal that our unsupervised framework outperformed other extractive methods on ROUGE as well as two human evaluations for semantic similarity and summary coherence.</abstract>
<identifier type="citekey">huang-etal-2022-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.3/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>15</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Text Summarization of Long Documents using Dependency-based Noun Phrases and Contextual Order Arrangement
%A Huang, Yen-Hao
%A Lan, Hsiao-Yen
%A Chen, Yi-Shin
%Y Chang, Yung-Chun
%Y Huang, Yi-Chin
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%F huang-etal-2022-unsupervised
%X Unsupervised extractive summarization has recently gained importance since it does not require labeled data. Among unsupervised methods, graph-based approaches have achieved outstanding results. These methods represent each document by a graph, with sentences as nodes and word-level similarity among sentences as edges. Common words can easily lead to a strong connection between sentence nodes. Thus, sentences with many common words can be misinterpreted as salient sentences for a summary. This work addresses the common word issue with a phrase-level graph that (1) focuses on the noun phrases of a document based on grammar dependencies and (2) initializes edge weights by term-frequency within the target document and inverse document frequency over the entire corpus. The importance scores of noun phrases extracted from the graph are then used to select the most salient sentences. To preserve summary coherence, the order of the selected sentences is re-arranged by a flow-aware orderBERT. The results reveal that our unsupervised framework outperformed other extractive methods on ROUGE as well as two human evaluations for semantic similarity and summary coherence.
%U https://aclanthology.org/2022.rocling-1.3/
%P 15-24
Markdown (Informal)
[Unsupervised Text Summarization of Long Documents using Dependency-based Noun Phrases and Contextual Order Arrangement](https://aclanthology.org/2022.rocling-1.3/) (Huang et al., ROCLING 2022)
ACL