@inproceedings{ferhat-etal-2024-functional,
title = "Functional Text Dimensions for {A}rabic Text Classification",
author = "Ferhat, Zeyd and
Betka, Abir and
Barka, Riyadh and
Kahhoul, Zineddine and
Boutiba, Selma and
Tiar, Mohamed and
Dahmani, Habiba and
Abdelali, Ahmed",
editor = "Habash, Nizar and
Bouamor, Houda and
Eskander, Ramy and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Abdelali, Ahmed and
Touileb, Samia and
Hamed, Injy and
Onaizan, Yaser and
Alhafni, Bashar and
Antoun, Wissam and
Khalifa, Salam and
Haddad, Hatem and
Zitouni, Imed and
AlKhamissi, Badr and
Almatham, Rawan and
Mrini, Khalil",
booktitle = "Proceedings of The Second Arabic Natural Language Processing Conference",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.arabicnlp-1.29",
pages = "352--360",
abstract = "Text classification is of paramount importance in a wide range of applications, including information retrieval, extraction and sentiment analysis. The challenge of classifying and labelling text genres, especially in web-based corpora, has received considerable attention. The frequent absence of unambiguous genre information complicates the identification of text types. To address these issues, the Functional Text Dimensions (FTD) method has been introduced to provide a universal set of categories for text classification. This study presents the Arabic Functional Text Dimensions Corpus (AFTD Corpus), a carefully curated collection of documents for evaluating text classification in Arabic. The AFTD Corpus which we are making available to the community, consists of 3400 documents spanning 17 different class categories. Through a comprehensive evaluation using traditional machine learning and neural models, we assess the effectiveness of the FTD approach in the Arabic context. CAMeLBERT, a state-of-the-art model, achieved an impressive F1 score of 0.81 on our corpus. This research highlights the potential of the FTD method for improving text classification, especially for Arabic content, and underlines the importance of robust classification models in web applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferhat-etal-2024-functional">
<titleInfo>
<title>Functional Text Dimensions for Arabic Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zeyd</namePart>
<namePart type="family">Ferhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abir</namePart>
<namePart type="family">Betka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Riyadh</namePart>
<namePart type="family">Barka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zineddine</namePart>
<namePart type="family">Kahhoul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Selma</namePart>
<namePart type="family">Boutiba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Tiar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Habiba</namePart>
<namePart type="family">Dahmani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Second Arabic Natural Language Processing Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramy</namePart>
<namePart type="family">Eskander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injy</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wissam</namePart>
<namePart type="family">Antoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="family">AlKhamissi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text classification is of paramount importance in a wide range of applications, including information retrieval, extraction and sentiment analysis. The challenge of classifying and labelling text genres, especially in web-based corpora, has received considerable attention. The frequent absence of unambiguous genre information complicates the identification of text types. To address these issues, the Functional Text Dimensions (FTD) method has been introduced to provide a universal set of categories for text classification. This study presents the Arabic Functional Text Dimensions Corpus (AFTD Corpus), a carefully curated collection of documents for evaluating text classification in Arabic. The AFTD Corpus which we are making available to the community, consists of 3400 documents spanning 17 different class categories. Through a comprehensive evaluation using traditional machine learning and neural models, we assess the effectiveness of the FTD approach in the Arabic context. CAMeLBERT, a state-of-the-art model, achieved an impressive F1 score of 0.81 on our corpus. This research highlights the potential of the FTD method for improving text classification, especially for Arabic content, and underlines the importance of robust classification models in web applications.</abstract>
<identifier type="citekey">ferhat-etal-2024-functional</identifier>
<location>
<url>https://aclanthology.org/2024.arabicnlp-1.29</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>352</start>
<end>360</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Functional Text Dimensions for Arabic Text Classification
%A Ferhat, Zeyd
%A Betka, Abir
%A Barka, Riyadh
%A Kahhoul, Zineddine
%A Boutiba, Selma
%A Tiar, Mohamed
%A Dahmani, Habiba
%A Abdelali, Ahmed
%Y Habash, Nizar
%Y Bouamor, Houda
%Y Eskander, Ramy
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Abdelali, Ahmed
%Y Touileb, Samia
%Y Hamed, Injy
%Y Onaizan, Yaser
%Y Alhafni, Bashar
%Y Antoun, Wissam
%Y Khalifa, Salam
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y AlKhamissi, Badr
%Y Almatham, Rawan
%Y Mrini, Khalil
%S Proceedings of The Second Arabic Natural Language Processing Conference
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F ferhat-etal-2024-functional
%X Text classification is of paramount importance in a wide range of applications, including information retrieval, extraction and sentiment analysis. The challenge of classifying and labelling text genres, especially in web-based corpora, has received considerable attention. The frequent absence of unambiguous genre information complicates the identification of text types. To address these issues, the Functional Text Dimensions (FTD) method has been introduced to provide a universal set of categories for text classification. This study presents the Arabic Functional Text Dimensions Corpus (AFTD Corpus), a carefully curated collection of documents for evaluating text classification in Arabic. The AFTD Corpus which we are making available to the community, consists of 3400 documents spanning 17 different class categories. Through a comprehensive evaluation using traditional machine learning and neural models, we assess the effectiveness of the FTD approach in the Arabic context. CAMeLBERT, a state-of-the-art model, achieved an impressive F1 score of 0.81 on our corpus. This research highlights the potential of the FTD method for improving text classification, especially for Arabic content, and underlines the importance of robust classification models in web applications.
%U https://aclanthology.org/2024.arabicnlp-1.29
%P 352-360
Markdown (Informal)
[Functional Text Dimensions for Arabic Text Classification](https://aclanthology.org/2024.arabicnlp-1.29) (Ferhat et al., ArabicNLP-WS 2024)
ACL
- Zeyd Ferhat, Abir Betka, Riyadh Barka, Zineddine Kahhoul, Selma Boutiba, Mohamed Tiar, Habiba Dahmani, and Ahmed Abdelali. 2024. Functional Text Dimensions for Arabic Text Classification. In Proceedings of The Second Arabic Natural Language Processing Conference, pages 352–360, Bangkok, Thailand. Association for Computational Linguistics.