@inproceedings{kahla-etal-2021-cross,
title = "Cross-lingual Fine-tuning for Abstractive {A}rabic Text Summarization",
author = "Kahla, Mram and
Yang, Zijian Gy{\H{o}}z{\H{o}} and
Nov{\'a}k, Attila",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.74",
pages = "655--663",
abstract = "While abstractive summarization in certain languages, like English, has already reached fairly good results due to the availability of trend-setting resources, like the CNN/Daily Mail dataset, and considerable progress in generative neural models, progress in abstractive summarization for Arabic, the fifth most-spoken language globally, is still in baby shoes. While some resources for extractive summarization have been available for some time, in this paper, we present the first corpus of human-written abstractive news summaries in Arabic, hoping to lay the foundation of this line of research for this important language. The dataset consists of more than 21 thousand items. We used this dataset to train a set of neural abstractive summarization systems for Arabic by fine-tuning pre-trained language models such as multilingual BERT, AraBERT, and multilingual BART-50. As the Arabic dataset is much smaller than e.g. the CNN/Daily Mail dataset, we also applied cross-lingual knowledge transfer to significantly improve the performance of our baseline systems. The setups included two M-BERT-based summarization models originally trained for Hungarian/English and a similar system based on M-BART-50 originally trained for Russian that were further fine-tuned for Arabic. Evaluation of the models was performed in terms of ROUGE, and a manual evaluation of fluency and adequacy of the models was also performed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kahla-etal-2021-cross">
<titleInfo>
<title>Cross-lingual Fine-tuning for Abstractive Arabic Text Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mram</namePart>
<namePart type="family">Kahla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zijian</namePart>
<namePart type="given">Győző</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Attila</namePart>
<namePart type="family">Novák</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While abstractive summarization in certain languages, like English, has already reached fairly good results due to the availability of trend-setting resources, like the CNN/Daily Mail dataset, and considerable progress in generative neural models, progress in abstractive summarization for Arabic, the fifth most-spoken language globally, is still in baby shoes. While some resources for extractive summarization have been available for some time, in this paper, we present the first corpus of human-written abstractive news summaries in Arabic, hoping to lay the foundation of this line of research for this important language. The dataset consists of more than 21 thousand items. We used this dataset to train a set of neural abstractive summarization systems for Arabic by fine-tuning pre-trained language models such as multilingual BERT, AraBERT, and multilingual BART-50. As the Arabic dataset is much smaller than e.g. the CNN/Daily Mail dataset, we also applied cross-lingual knowledge transfer to significantly improve the performance of our baseline systems. The setups included two M-BERT-based summarization models originally trained for Hungarian/English and a similar system based on M-BART-50 originally trained for Russian that were further fine-tuned for Arabic. Evaluation of the models was performed in terms of ROUGE, and a manual evaluation of fluency and adequacy of the models was also performed.</abstract>
<identifier type="citekey">kahla-etal-2021-cross</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.74</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>655</start>
<end>663</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-lingual Fine-tuning for Abstractive Arabic Text Summarization
%A Kahla, Mram
%A Yang, Zijian Győző
%A Novák, Attila
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F kahla-etal-2021-cross
%X While abstractive summarization in certain languages, like English, has already reached fairly good results due to the availability of trend-setting resources, like the CNN/Daily Mail dataset, and considerable progress in generative neural models, progress in abstractive summarization for Arabic, the fifth most-spoken language globally, is still in baby shoes. While some resources for extractive summarization have been available for some time, in this paper, we present the first corpus of human-written abstractive news summaries in Arabic, hoping to lay the foundation of this line of research for this important language. The dataset consists of more than 21 thousand items. We used this dataset to train a set of neural abstractive summarization systems for Arabic by fine-tuning pre-trained language models such as multilingual BERT, AraBERT, and multilingual BART-50. As the Arabic dataset is much smaller than e.g. the CNN/Daily Mail dataset, we also applied cross-lingual knowledge transfer to significantly improve the performance of our baseline systems. The setups included two M-BERT-based summarization models originally trained for Hungarian/English and a similar system based on M-BART-50 originally trained for Russian that were further fine-tuned for Arabic. Evaluation of the models was performed in terms of ROUGE, and a manual evaluation of fluency and adequacy of the models was also performed.
%U https://aclanthology.org/2021.ranlp-1.74
%P 655-663
Markdown (Informal)
[Cross-lingual Fine-tuning for Abstractive Arabic Text Summarization](https://aclanthology.org/2021.ranlp-1.74) (Kahla et al., RANLP 2021)
ACL