@inproceedings{raychawdhary-etal-2023-seals,
title = "{S}eals{\_}{L}ab at {S}em{E}val-2023 Task 12: Sentiment Analysis for Low-resource {A}frican Languages, {H}ausa and {I}gbo",
author = "Raychawdhary, Nilanjana and
Das, Amit and
Dozier, Gerry and
D. Seals, Cheryl",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.208",
doi = "10.18653/v1/2023.semeval-1.208",
pages = "1508--1517",
abstract = "One of the most extensively researched applications in natural language processing (NLP) is sentiment analysis. While the majority of the study focuses on high-resource languages (e.g., English), this research will focus on low-resource African languages namely Igbo and Hausa. The annotated tweets of both languages have a significant number of code-mixed tweets. The curated datasets necessary to build complex AI applications are not available for the majority of African languages. To optimize the use of such datasets, research is needed to determine the viability of present NLP procedures as well as the development of novel techniques. This paper outlines our efforts to develop a sentiment analysis (for positive and negative as well as neutral) system for tweets from the Hausa, and Igbo languages. Sentiment analysis can computationally analyze and discover sentiments in a text or document. We worked on the first thorough compilation of AfriSenti-SemEval 2023 Shared Task 12 Twitter datasets that are human-annotated for the most widely spoken languages in Nigeria, such as Hausa and Igbo. Here we trained the modern pre-trained language model AfriBERTa large on the AfriSenti-SemEval Shared Task 12 Twitter dataset to create sentiment classification. In particular, the results demonstrate that our model trained on AfriSenti-SemEval Shared Task 12 datasets and produced with an F1 score of 80.85{\%} for Hausa and 80.82{\%} for Igbo languages on the sentiment analysis test. In AfriSenti-SemEval 2023 shared task 12 (Task A), we consistently ranked top 10 by achieving a mean F1 score of more than 80{\%} for both the Hausa and Igbo languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="raychawdhary-etal-2023-seals">
<titleInfo>
<title>Seals_Lab at SemEval-2023 Task 12: Sentiment Analysis for Low-resource African Languages, Hausa and Igbo</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nilanjana</namePart>
<namePart type="family">Raychawdhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amit</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerry</namePart>
<namePart type="family">Dozier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheryl</namePart>
<namePart type="family">D. Seals</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the most extensively researched applications in natural language processing (NLP) is sentiment analysis. While the majority of the study focuses on high-resource languages (e.g., English), this research will focus on low-resource African languages namely Igbo and Hausa. The annotated tweets of both languages have a significant number of code-mixed tweets. The curated datasets necessary to build complex AI applications are not available for the majority of African languages. To optimize the use of such datasets, research is needed to determine the viability of present NLP procedures as well as the development of novel techniques. This paper outlines our efforts to develop a sentiment analysis (for positive and negative as well as neutral) system for tweets from the Hausa, and Igbo languages. Sentiment analysis can computationally analyze and discover sentiments in a text or document. We worked on the first thorough compilation of AfriSenti-SemEval 2023 Shared Task 12 Twitter datasets that are human-annotated for the most widely spoken languages in Nigeria, such as Hausa and Igbo. Here we trained the modern pre-trained language model AfriBERTa large on the AfriSenti-SemEval Shared Task 12 Twitter dataset to create sentiment classification. In particular, the results demonstrate that our model trained on AfriSenti-SemEval Shared Task 12 datasets and produced with an F1 score of 80.85% for Hausa and 80.82% for Igbo languages on the sentiment analysis test. In AfriSenti-SemEval 2023 shared task 12 (Task A), we consistently ranked top 10 by achieving a mean F1 score of more than 80% for both the Hausa and Igbo languages.</abstract>
<identifier type="citekey">raychawdhary-etal-2023-seals</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.208</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.208</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>1508</start>
<end>1517</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Seals_Lab at SemEval-2023 Task 12: Sentiment Analysis for Low-resource African Languages, Hausa and Igbo
%A Raychawdhary, Nilanjana
%A Das, Amit
%A Dozier, Gerry
%A D. Seals, Cheryl
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F raychawdhary-etal-2023-seals
%X One of the most extensively researched applications in natural language processing (NLP) is sentiment analysis. While the majority of the study focuses on high-resource languages (e.g., English), this research will focus on low-resource African languages namely Igbo and Hausa. The annotated tweets of both languages have a significant number of code-mixed tweets. The curated datasets necessary to build complex AI applications are not available for the majority of African languages. To optimize the use of such datasets, research is needed to determine the viability of present NLP procedures as well as the development of novel techniques. This paper outlines our efforts to develop a sentiment analysis (for positive and negative as well as neutral) system for tweets from the Hausa, and Igbo languages. Sentiment analysis can computationally analyze and discover sentiments in a text or document. We worked on the first thorough compilation of AfriSenti-SemEval 2023 Shared Task 12 Twitter datasets that are human-annotated for the most widely spoken languages in Nigeria, such as Hausa and Igbo. Here we trained the modern pre-trained language model AfriBERTa large on the AfriSenti-SemEval Shared Task 12 Twitter dataset to create sentiment classification. In particular, the results demonstrate that our model trained on AfriSenti-SemEval Shared Task 12 datasets and produced with an F1 score of 80.85% for Hausa and 80.82% for Igbo languages on the sentiment analysis test. In AfriSenti-SemEval 2023 shared task 12 (Task A), we consistently ranked top 10 by achieving a mean F1 score of more than 80% for both the Hausa and Igbo languages.
%R 10.18653/v1/2023.semeval-1.208
%U https://aclanthology.org/2023.semeval-1.208
%U https://doi.org/10.18653/v1/2023.semeval-1.208
%P 1508-1517
Markdown (Informal)
[Seals_Lab at SemEval-2023 Task 12: Sentiment Analysis for Low-resource African Languages, Hausa and Igbo](https://aclanthology.org/2023.semeval-1.208) (Raychawdhary et al., SemEval 2023)
ACL