@inproceedings{ralev-pfeffer-2022-hate,
title = "Hate Speech Classification in {B}ulgarian",
author = {Ralev, Radoslav and
Pfeffer, J{\"u}rgen},
booktitle = "Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)",
month = sep,
year = "2022",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, IBL -- BAS",
url = "https://aclanthology.org/2022.clib-1.5",
pages = "49--58",
abstract = "In recent years, we have seen a surge in the propagation of online hate speech on social media platforms. According to a multitude of sources such as the European Council, hate speech can lead to acts of violence and conflict on a broader scale. That has led to in- creased awareness by governments, companies, and the scientific community, and although the field is relatively new, there have been considerable advancements in the field as a result of the collective effort. Despite the increasingly better results, most of the research focuses on the more popular languages (i.e., English, German, or Arabic), whereas less popular languages such as Bulgarian and other Balkan languages have been neglected. We have aggregated a real-world dataset from Bulgarian online forums and manually annotated 108,142 sentences. About 1.74{\%} of which can be described with the categories racism, sexism, rudeness, and profanity. We then developed and evaluated various classifiers on the dataset and found that a support vector machine with a linear kernel trained on character-level TF-IDF features is the best model. Our work can be seen as another piece in the puzzle to building a strong foundation for future work on hate speech classification in Bulgarian.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ralev-pfeffer-2022-hate">
<titleInfo>
<title>Hate Speech Classification in Bulgarian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Radoslav</namePart>
<namePart type="family">Ralev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jürgen</namePart>
<namePart type="family">Pfeffer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, IBL – BAS</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, we have seen a surge in the propagation of online hate speech on social media platforms. According to a multitude of sources such as the European Council, hate speech can lead to acts of violence and conflict on a broader scale. That has led to in- creased awareness by governments, companies, and the scientific community, and although the field is relatively new, there have been considerable advancements in the field as a result of the collective effort. Despite the increasingly better results, most of the research focuses on the more popular languages (i.e., English, German, or Arabic), whereas less popular languages such as Bulgarian and other Balkan languages have been neglected. We have aggregated a real-world dataset from Bulgarian online forums and manually annotated 108,142 sentences. About 1.74% of which can be described with the categories racism, sexism, rudeness, and profanity. We then developed and evaluated various classifiers on the dataset and found that a support vector machine with a linear kernel trained on character-level TF-IDF features is the best model. Our work can be seen as another piece in the puzzle to building a strong foundation for future work on hate speech classification in Bulgarian.</abstract>
<identifier type="citekey">ralev-pfeffer-2022-hate</identifier>
<location>
<url>https://aclanthology.org/2022.clib-1.5</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>49</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hate Speech Classification in Bulgarian
%A Ralev, Radoslav
%A Pfeffer, Jürgen
%S Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)
%D 2022
%8 September
%I Department of Computational Linguistics, IBL – BAS
%C Sofia, Bulgaria
%F ralev-pfeffer-2022-hate
%X In recent years, we have seen a surge in the propagation of online hate speech on social media platforms. According to a multitude of sources such as the European Council, hate speech can lead to acts of violence and conflict on a broader scale. That has led to in- creased awareness by governments, companies, and the scientific community, and although the field is relatively new, there have been considerable advancements in the field as a result of the collective effort. Despite the increasingly better results, most of the research focuses on the more popular languages (i.e., English, German, or Arabic), whereas less popular languages such as Bulgarian and other Balkan languages have been neglected. We have aggregated a real-world dataset from Bulgarian online forums and manually annotated 108,142 sentences. About 1.74% of which can be described with the categories racism, sexism, rudeness, and profanity. We then developed and evaluated various classifiers on the dataset and found that a support vector machine with a linear kernel trained on character-level TF-IDF features is the best model. Our work can be seen as another piece in the puzzle to building a strong foundation for future work on hate speech classification in Bulgarian.
%U https://aclanthology.org/2022.clib-1.5
%P 49-58
Markdown (Informal)
[Hate Speech Classification in Bulgarian](https://aclanthology.org/2022.clib-1.5) (Ralev & Pfeffer, CLIB 2022)
ACL
- Radoslav Ralev and Jürgen Pfeffer. 2022. Hate Speech Classification in Bulgarian. In Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022), pages 49–58, Sofia, Bulgaria. Department of Computational Linguistics, IBL -- BAS.