@inproceedings{aftar-etal-2024-robert2vectm,
title = "{R}o{BERT}2{V}ec{TM}: A Novel Approach for Topic Extraction in Islamic Studies",
author = "Aftar, Sania and
Gagliardelli, Luca and
Ganadi, Amina and
Ruozzi, Federico and
Bergamaschi, Sonia",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.534",
pages = "9148--9158",
abstract = "Investigating {``}Hadith{''} texts, crucial for theological studies and Islamic jurisprudence, presents challenges due to the linguistic complexity of Arabic, such as its complex morphology. In this paper, we propose an innovative approach to address the challenges of topic modeling in Hadith studies by utilizing the Contextualized Topic Model (CTM). Our study introduces RoBERT2VecTM, a novel neural-based approach that combines the RoBERTa transformer model with Doc2Vec, specifically targeting the semantic analysis of {``}Matn{''} (the actual content). The methodology outperforms many traditional state-of-the-art NLP models by generating more coherent and diverse Arabic topics. The diversity of the generated topics allows for further categorization, deepening the understanding of discussed concepts. Notably, our research highlights the critical impact of lemmatization and stopwords in enhancing topic modeling. This breakthrough marks a significant stride in applying NLP to non-Latin languages and opens new avenues for the nuanced analysis of complex religious texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aftar-etal-2024-robert2vectm">
<titleInfo>
<title>RoBERT2VecTM: A Novel Approach for Topic Extraction in Islamic Studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sania</namePart>
<namePart type="family">Aftar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Gagliardelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amina</namePart>
<namePart type="family">Ganadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Ruozzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonia</namePart>
<namePart type="family">Bergamaschi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Investigating “Hadith” texts, crucial for theological studies and Islamic jurisprudence, presents challenges due to the linguistic complexity of Arabic, such as its complex morphology. In this paper, we propose an innovative approach to address the challenges of topic modeling in Hadith studies by utilizing the Contextualized Topic Model (CTM). Our study introduces RoBERT2VecTM, a novel neural-based approach that combines the RoBERTa transformer model with Doc2Vec, specifically targeting the semantic analysis of “Matn” (the actual content). The methodology outperforms many traditional state-of-the-art NLP models by generating more coherent and diverse Arabic topics. The diversity of the generated topics allows for further categorization, deepening the understanding of discussed concepts. Notably, our research highlights the critical impact of lemmatization and stopwords in enhancing topic modeling. This breakthrough marks a significant stride in applying NLP to non-Latin languages and opens new avenues for the nuanced analysis of complex religious texts.</abstract>
<identifier type="citekey">aftar-etal-2024-robert2vectm</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.534</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9148</start>
<end>9158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RoBERT2VecTM: A Novel Approach for Topic Extraction in Islamic Studies
%A Aftar, Sania
%A Gagliardelli, Luca
%A Ganadi, Amina
%A Ruozzi, Federico
%A Bergamaschi, Sonia
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F aftar-etal-2024-robert2vectm
%X Investigating “Hadith” texts, crucial for theological studies and Islamic jurisprudence, presents challenges due to the linguistic complexity of Arabic, such as its complex morphology. In this paper, we propose an innovative approach to address the challenges of topic modeling in Hadith studies by utilizing the Contextualized Topic Model (CTM). Our study introduces RoBERT2VecTM, a novel neural-based approach that combines the RoBERTa transformer model with Doc2Vec, specifically targeting the semantic analysis of “Matn” (the actual content). The methodology outperforms many traditional state-of-the-art NLP models by generating more coherent and diverse Arabic topics. The diversity of the generated topics allows for further categorization, deepening the understanding of discussed concepts. Notably, our research highlights the critical impact of lemmatization and stopwords in enhancing topic modeling. This breakthrough marks a significant stride in applying NLP to non-Latin languages and opens new avenues for the nuanced analysis of complex religious texts.
%U https://aclanthology.org/2024.findings-emnlp.534
%P 9148-9158
Markdown (Informal)
[RoBERT2VecTM: A Novel Approach for Topic Extraction in Islamic Studies](https://aclanthology.org/2024.findings-emnlp.534) (Aftar et al., Findings 2024)
ACL