@inproceedings{alqurashi-etal-2025-bert,
title = "{BERT}-based Classical {A}rabic Poetry Authorship Attribution",
author = "Alqurashi, Lama and
Sharoff, Serge and
Watson, Janet and
Blakesley, Jacob",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.409/",
pages = "6105--6119",
abstract = "This study introduces a novel computational approach to authorship attribution (AA) in Arabic poetry, using the entire Classical Arabic Poetry corpus for the first time and offering a direct analysis of real cases of misattribution. AA in Arabic poetry has been a significant issue since the 9th century, particularly due to the loss of pre-Islamic poetry and the misattribution of post-Islamic works to earlier poets. While previous research has predominantly employed qualitative methods, this study uses computational techniques to address these challenges. The corpus was scraped from online sources and enriched with manually curated Date of Death (DoD) information to overcome the problematic traditional sectioning. Additionally, we applied Embedded Topic Modeling (ETM) to label each poem with its topic contributions, further enhancing the dataset`s value. An ensemble model based on CAMeLBERT was developed and tested across three dimensions: topic, number of poets, and number of training examples. After parameter optimization, the model achieved F1 scores ranging from 0.97 to 1.0. The model was also applied to four pre-Islamic misattribution cases, producing results consistent with historical and literary studies."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alqurashi-etal-2025-bert">
<titleInfo>
<title>BERT-based Classical Arabic Poetry Authorship Attribution</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lama</namePart>
<namePart type="family">Alqurashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janet</namePart>
<namePart type="family">Watson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Blakesley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study introduces a novel computational approach to authorship attribution (AA) in Arabic poetry, using the entire Classical Arabic Poetry corpus for the first time and offering a direct analysis of real cases of misattribution. AA in Arabic poetry has been a significant issue since the 9th century, particularly due to the loss of pre-Islamic poetry and the misattribution of post-Islamic works to earlier poets. While previous research has predominantly employed qualitative methods, this study uses computational techniques to address these challenges. The corpus was scraped from online sources and enriched with manually curated Date of Death (DoD) information to overcome the problematic traditional sectioning. Additionally, we applied Embedded Topic Modeling (ETM) to label each poem with its topic contributions, further enhancing the dataset‘s value. An ensemble model based on CAMeLBERT was developed and tested across three dimensions: topic, number of poets, and number of training examples. After parameter optimization, the model achieved F1 scores ranging from 0.97 to 1.0. The model was also applied to four pre-Islamic misattribution cases, producing results consistent with historical and literary studies.</abstract>
<identifier type="citekey">alqurashi-etal-2025-bert</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.409/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>6105</start>
<end>6119</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERT-based Classical Arabic Poetry Authorship Attribution
%A Alqurashi, Lama
%A Sharoff, Serge
%A Watson, Janet
%A Blakesley, Jacob
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F alqurashi-etal-2025-bert
%X This study introduces a novel computational approach to authorship attribution (AA) in Arabic poetry, using the entire Classical Arabic Poetry corpus for the first time and offering a direct analysis of real cases of misattribution. AA in Arabic poetry has been a significant issue since the 9th century, particularly due to the loss of pre-Islamic poetry and the misattribution of post-Islamic works to earlier poets. While previous research has predominantly employed qualitative methods, this study uses computational techniques to address these challenges. The corpus was scraped from online sources and enriched with manually curated Date of Death (DoD) information to overcome the problematic traditional sectioning. Additionally, we applied Embedded Topic Modeling (ETM) to label each poem with its topic contributions, further enhancing the dataset‘s value. An ensemble model based on CAMeLBERT was developed and tested across three dimensions: topic, number of poets, and number of training examples. After parameter optimization, the model achieved F1 scores ranging from 0.97 to 1.0. The model was also applied to four pre-Islamic misattribution cases, producing results consistent with historical and literary studies.
%U https://aclanthology.org/2025.coling-main.409/
%P 6105-6119
Markdown (Informal)
[BERT-based Classical Arabic Poetry Authorship Attribution](https://aclanthology.org/2025.coling-main.409/) (Alqurashi et al., COLING 2025)
ACL
- Lama Alqurashi, Serge Sharoff, Janet Watson, and Jacob Blakesley. 2025. BERT-based Classical Arabic Poetry Authorship Attribution. In Proceedings of the 31st International Conference on Computational Linguistics, pages 6105–6119, Abu Dhabi, UAE. Association for Computational Linguistics.