@inproceedings{faheem-etal-2024-urdumasd,
title = "{U}rdu{MASD}: A Multimodal Abstractive Summarization Dataset for {U}rdu",
author = "Faheem, Ali and
Ullah, Faizad and
Ayub, Muhammad Sohaib and
Karim, Asim",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1498",
pages = "17245--17253",
abstract = "In this era of multimedia dominance, the surge of multimodal content on social media has transformed our methods of communication and information exchange. With the widespread use of multimedia content, the ability to effectively summarize this multimodal content is crucial for enhancing consumption, searchability, and retrieval. The scarcity of such training datasets has been a barrier to research in this area, especially for low-resource languages like Urdu. To address this gap, this paper introduces {``}UrduMASD{''}, a video-based Urdu multimodal abstractive text summarization dataset. The dataset contains 15,374 collections of videos, audio, titles, transcripts, and corresponding text summaries. To ensure the quality of the dataset, intrinsic evaluation metrics such as Abstractivity, Compression, Redundancy, and Semantic coherence have been employed. It was observed that our dataset surpasses existing datasets on numerous key quality metrics. Additionally, we present baseline results achieved using both text-based and state-of-the-art multimodal summarization models. On adding visual information, an improvement of 2.6{\%} was observed in the ROUGE scores, highlighting the efficacy of utilizing multimodal inputs for summarization. To the best of our knowledge, this is the first dataset in Urdu that provides video-based multimodal data for abstractive text summarization, making it a valuable resource for advancing research in this field.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="faheem-etal-2024-urdumasd">
<titleInfo>
<title>UrduMASD: A Multimodal Abstractive Summarization Dataset for Urdu</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Faheem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faizad</namePart>
<namePart type="family">Ullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Sohaib</namePart>
<namePart type="family">Ayub</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asim</namePart>
<namePart type="family">Karim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this era of multimedia dominance, the surge of multimodal content on social media has transformed our methods of communication and information exchange. With the widespread use of multimedia content, the ability to effectively summarize this multimodal content is crucial for enhancing consumption, searchability, and retrieval. The scarcity of such training datasets has been a barrier to research in this area, especially for low-resource languages like Urdu. To address this gap, this paper introduces “UrduMASD”, a video-based Urdu multimodal abstractive text summarization dataset. The dataset contains 15,374 collections of videos, audio, titles, transcripts, and corresponding text summaries. To ensure the quality of the dataset, intrinsic evaluation metrics such as Abstractivity, Compression, Redundancy, and Semantic coherence have been employed. It was observed that our dataset surpasses existing datasets on numerous key quality metrics. Additionally, we present baseline results achieved using both text-based and state-of-the-art multimodal summarization models. On adding visual information, an improvement of 2.6% was observed in the ROUGE scores, highlighting the efficacy of utilizing multimodal inputs for summarization. To the best of our knowledge, this is the first dataset in Urdu that provides video-based multimodal data for abstractive text summarization, making it a valuable resource for advancing research in this field.</abstract>
<identifier type="citekey">faheem-etal-2024-urdumasd</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1498</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>17245</start>
<end>17253</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UrduMASD: A Multimodal Abstractive Summarization Dataset for Urdu
%A Faheem, Ali
%A Ullah, Faizad
%A Ayub, Muhammad Sohaib
%A Karim, Asim
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F faheem-etal-2024-urdumasd
%X In this era of multimedia dominance, the surge of multimodal content on social media has transformed our methods of communication and information exchange. With the widespread use of multimedia content, the ability to effectively summarize this multimodal content is crucial for enhancing consumption, searchability, and retrieval. The scarcity of such training datasets has been a barrier to research in this area, especially for low-resource languages like Urdu. To address this gap, this paper introduces “UrduMASD”, a video-based Urdu multimodal abstractive text summarization dataset. The dataset contains 15,374 collections of videos, audio, titles, transcripts, and corresponding text summaries. To ensure the quality of the dataset, intrinsic evaluation metrics such as Abstractivity, Compression, Redundancy, and Semantic coherence have been employed. It was observed that our dataset surpasses existing datasets on numerous key quality metrics. Additionally, we present baseline results achieved using both text-based and state-of-the-art multimodal summarization models. On adding visual information, an improvement of 2.6% was observed in the ROUGE scores, highlighting the efficacy of utilizing multimodal inputs for summarization. To the best of our knowledge, this is the first dataset in Urdu that provides video-based multimodal data for abstractive text summarization, making it a valuable resource for advancing research in this field.
%U https://aclanthology.org/2024.lrec-main.1498
%P 17245-17253
Markdown (Informal)
[UrduMASD: A Multimodal Abstractive Summarization Dataset for Urdu](https://aclanthology.org/2024.lrec-main.1498) (Faheem et al., LREC-COLING 2024)
ACL
- Ali Faheem, Faizad Ullah, Muhammad Sohaib Ayub, and Asim Karim. 2024. UrduMASD: A Multimodal Abstractive Summarization Dataset for Urdu. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 17245–17253, Torino, Italia. ELRA and ICCL.