@inproceedings{manakul-gales-2021-long,
title = "Long-Span Summarization via Local Attention and Content Selection",
author = "Manakul, Potsawee and
Gales, Mark",
editor = "Zong, Chengqing and
Xia, Fei and
Li, Wenjie and
Navigli, Roberto",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-long.470",
doi = "10.18653/v1/2021.acl-long.470",
pages = "6026--6041",
abstract = "Transformer-based models have achieved state-of-the-art results in a wide range of natural language processing (NLP) tasks including document summarization. Typically these systems are trained by fine-tuning a large pre-trained model to the target task. One issue with these transformer-based models is that they do not scale well in terms of memory and compute requirements as the input length grows. Thus, for long document summarization, it can be challenging to train or fine-tune these models. In this work, we exploit large pre-trained transformer-based models and address long-span dependencies in abstractive summarization using two methods: local self-attention; and explicit content selection. These approaches are compared on a range of network configurations. Experiments are carried out on standard long-span summarization tasks, including Spotify Podcast, arXiv, and PubMed datasets. We demonstrate that by combining these methods, we can achieve state-of-the-art results on all three tasks in the ROUGE scores. Moreover, without a large-scale GPU card, our approach can achieve comparable or better results than existing approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manakul-gales-2021-long">
<titleInfo>
<title>Long-Span Summarization via Local Attention and Content Selection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Potsawee</namePart>
<namePart type="family">Manakul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Gales</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Navigli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformer-based models have achieved state-of-the-art results in a wide range of natural language processing (NLP) tasks including document summarization. Typically these systems are trained by fine-tuning a large pre-trained model to the target task. One issue with these transformer-based models is that they do not scale well in terms of memory and compute requirements as the input length grows. Thus, for long document summarization, it can be challenging to train or fine-tune these models. In this work, we exploit large pre-trained transformer-based models and address long-span dependencies in abstractive summarization using two methods: local self-attention; and explicit content selection. These approaches are compared on a range of network configurations. Experiments are carried out on standard long-span summarization tasks, including Spotify Podcast, arXiv, and PubMed datasets. We demonstrate that by combining these methods, we can achieve state-of-the-art results on all three tasks in the ROUGE scores. Moreover, without a large-scale GPU card, our approach can achieve comparable or better results than existing approaches.</abstract>
<identifier type="citekey">manakul-gales-2021-long</identifier>
<identifier type="doi">10.18653/v1/2021.acl-long.470</identifier>
<location>
<url>https://aclanthology.org/2021.acl-long.470</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>6026</start>
<end>6041</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Long-Span Summarization via Local Attention and Content Selection
%A Manakul, Potsawee
%A Gales, Mark
%Y Zong, Chengqing
%Y Xia, Fei
%Y Li, Wenjie
%Y Navigli, Roberto
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F manakul-gales-2021-long
%X Transformer-based models have achieved state-of-the-art results in a wide range of natural language processing (NLP) tasks including document summarization. Typically these systems are trained by fine-tuning a large pre-trained model to the target task. One issue with these transformer-based models is that they do not scale well in terms of memory and compute requirements as the input length grows. Thus, for long document summarization, it can be challenging to train or fine-tune these models. In this work, we exploit large pre-trained transformer-based models and address long-span dependencies in abstractive summarization using two methods: local self-attention; and explicit content selection. These approaches are compared on a range of network configurations. Experiments are carried out on standard long-span summarization tasks, including Spotify Podcast, arXiv, and PubMed datasets. We demonstrate that by combining these methods, we can achieve state-of-the-art results on all three tasks in the ROUGE scores. Moreover, without a large-scale GPU card, our approach can achieve comparable or better results than existing approaches.
%R 10.18653/v1/2021.acl-long.470
%U https://aclanthology.org/2021.acl-long.470
%U https://doi.org/10.18653/v1/2021.acl-long.470
%P 6026-6041
Markdown (Informal)
[Long-Span Summarization via Local Attention and Content Selection](https://aclanthology.org/2021.acl-long.470) (Manakul & Gales, ACL-IJCNLP 2021)
ACL
- Potsawee Manakul and Mark Gales. 2021. Long-Span Summarization via Local Attention and Content Selection. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pages 6026–6041, Online. Association for Computational Linguistics.