@inproceedings{ayele-etal-2023-exploring,
title = "Exploring {A}mharic Hate Speech Data Collection and Classification Approaches",
author = "Ayele, Abinew Ali and
Yimam, Seid Muhie and
Belay, Tadesse Destaw and
Asfaw, Tesfa and
Biemann, Chris",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing",
month = sep,
year = "2023",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2023.ranlp-1.6",
pages = "49--59",
abstract = "In this paper, we present a study of efficient data selection and annotation strategies for Amharic hate speech. We also build various classification models and investigate the challenges of hate speech data selection, annotation, and classification for the Amharic language. From a total of over 18 million tweets in our Twitter corpus, 15.1k tweets are annotated by two independent native speakers, and a Cohen{'}s kappa score of 0.48 is achieved. A third annotator, a curator, is also employed to decide on the final gold labels. We employ both classical machine learning and deep learning approaches, which include fine-tuning AmFLAIR and AmRoBERTa contextual embedding models. Among all the models, AmFLAIR achieves the best performance with an F1-score of 72{\%}. We publicly release the annotation guidelines, keywords/lexicon entries, datasets, models, and associated scripts with a permissive license.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ayele-etal-2023-exploring">
<titleInfo>
<title>Exploring Amharic Hate Speech Data Collection and Classification Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abinew</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Ayele</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="given">Muhie</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tadesse</namePart>
<namePart type="given">Destaw</namePart>
<namePart type="family">Belay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tesfa</namePart>
<namePart type="family">Asfaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present a study of efficient data selection and annotation strategies for Amharic hate speech. We also build various classification models and investigate the challenges of hate speech data selection, annotation, and classification for the Amharic language. From a total of over 18 million tweets in our Twitter corpus, 15.1k tweets are annotated by two independent native speakers, and a Cohen’s kappa score of 0.48 is achieved. A third annotator, a curator, is also employed to decide on the final gold labels. We employ both classical machine learning and deep learning approaches, which include fine-tuning AmFLAIR and AmRoBERTa contextual embedding models. Among all the models, AmFLAIR achieves the best performance with an F1-score of 72%. We publicly release the annotation guidelines, keywords/lexicon entries, datasets, models, and associated scripts with a permissive license.</abstract>
<identifier type="citekey">ayele-etal-2023-exploring</identifier>
<location>
<url>https://aclanthology.org/2023.ranlp-1.6</url>
</location>
<part>
<date>2023-09</date>
<extent unit="page">
<start>49</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Amharic Hate Speech Data Collection and Classification Approaches
%A Ayele, Abinew Ali
%A Yimam, Seid Muhie
%A Belay, Tadesse Destaw
%A Asfaw, Tesfa
%A Biemann, Chris
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the 14th International Conference on Recent Advances in Natural Language Processing
%D 2023
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F ayele-etal-2023-exploring
%X In this paper, we present a study of efficient data selection and annotation strategies for Amharic hate speech. We also build various classification models and investigate the challenges of hate speech data selection, annotation, and classification for the Amharic language. From a total of over 18 million tweets in our Twitter corpus, 15.1k tweets are annotated by two independent native speakers, and a Cohen’s kappa score of 0.48 is achieved. A third annotator, a curator, is also employed to decide on the final gold labels. We employ both classical machine learning and deep learning approaches, which include fine-tuning AmFLAIR and AmRoBERTa contextual embedding models. Among all the models, AmFLAIR achieves the best performance with an F1-score of 72%. We publicly release the annotation guidelines, keywords/lexicon entries, datasets, models, and associated scripts with a permissive license.
%U https://aclanthology.org/2023.ranlp-1.6
%P 49-59
Markdown (Informal)
[Exploring Amharic Hate Speech Data Collection and Classification Approaches](https://aclanthology.org/2023.ranlp-1.6) (Ayele et al., RANLP 2023)
ACL