@inproceedings{rahman-etal-2024-cuet-sstm,
title = "{CUET}{\_}{SSTM} at the {GEM}{'}24 Summarization Task: Integration of extractive and abstractive method for long text summarization in {S}wahili language",
author = "Rahman, Samia and
Labib, Momtazul Arefin and
Murad, Hasan and
Das, Udoy",
editor = "Mille, Simon and
Clinciu, Miruna-Adriana",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-genchal.12",
pages = "112--117",
abstract = "Swahili, spoken by around 200 million people primarily in Tanzania and Kenya, has been the focus of our research for the GEM Shared Task at INLG{'}24 on Underrepresented Language Summarization. We have utilized the XLSUM dataset and have manually summarized 1000 texts from a Swahili news classification dataset. To achieve the desired results, we have tested abstractive summarizers (mT5{\_}multilingual{\_}XLSum, t5-small, mBART-50), and an extractive summarizer (based on PageRank algorithm). But our adopted model consists of an integrated extractive-abstractive model combining the Bert Extractive Summarizer with some abstractive summarizers (t5-small, mBART-50). The integrated model overcome the drawbacks of both the extractive and abstractive summarization system and utilizes the benefit from both of it. Extractive summarizer shorten the paragraphs exceeding 512 tokens, ensuring no important information has been lost before applying the abstractive models. The abstractive summarizer use its pretrained knowledge and ensure to generate context based summary.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rahman-etal-2024-cuet-sstm">
<titleInfo>
<title>CUET_SSTM at the GEM’24 Summarization Task: Integration of extractive and abstractive method for long text summarization in Swahili language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Momtazul</namePart>
<namePart type="given">Arefin</namePart>
<namePart type="family">Labib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hasan</namePart>
<namePart type="family">Murad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udoy</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miruna-Adriana</namePart>
<namePart type="family">Clinciu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Swahili, spoken by around 200 million people primarily in Tanzania and Kenya, has been the focus of our research for the GEM Shared Task at INLG’24 on Underrepresented Language Summarization. We have utilized the XLSUM dataset and have manually summarized 1000 texts from a Swahili news classification dataset. To achieve the desired results, we have tested abstractive summarizers (mT5_multilingual_XLSum, t5-small, mBART-50), and an extractive summarizer (based on PageRank algorithm). But our adopted model consists of an integrated extractive-abstractive model combining the Bert Extractive Summarizer with some abstractive summarizers (t5-small, mBART-50). The integrated model overcome the drawbacks of both the extractive and abstractive summarization system and utilizes the benefit from both of it. Extractive summarizer shorten the paragraphs exceeding 512 tokens, ensuring no important information has been lost before applying the abstractive models. The abstractive summarizer use its pretrained knowledge and ensure to generate context based summary.</abstract>
<identifier type="citekey">rahman-etal-2024-cuet-sstm</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-genchal.12</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>112</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUET_SSTM at the GEM’24 Summarization Task: Integration of extractive and abstractive method for long text summarization in Swahili language
%A Rahman, Samia
%A Labib, Momtazul Arefin
%A Murad, Hasan
%A Das, Udoy
%Y Mille, Simon
%Y Clinciu, Miruna-Adriana
%S Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F rahman-etal-2024-cuet-sstm
%X Swahili, spoken by around 200 million people primarily in Tanzania and Kenya, has been the focus of our research for the GEM Shared Task at INLG’24 on Underrepresented Language Summarization. We have utilized the XLSUM dataset and have manually summarized 1000 texts from a Swahili news classification dataset. To achieve the desired results, we have tested abstractive summarizers (mT5_multilingual_XLSum, t5-small, mBART-50), and an extractive summarizer (based on PageRank algorithm). But our adopted model consists of an integrated extractive-abstractive model combining the Bert Extractive Summarizer with some abstractive summarizers (t5-small, mBART-50). The integrated model overcome the drawbacks of both the extractive and abstractive summarization system and utilizes the benefit from both of it. Extractive summarizer shorten the paragraphs exceeding 512 tokens, ensuring no important information has been lost before applying the abstractive models. The abstractive summarizer use its pretrained knowledge and ensure to generate context based summary.
%U https://aclanthology.org/2024.inlg-genchal.12
%P 112-117
Markdown (Informal)
[CUET_SSTM at the GEM’24 Summarization Task: Integration of extractive and abstractive method for long text summarization in Swahili language](https://aclanthology.org/2024.inlg-genchal.12) (Rahman et al., INLG 2024)
ACL