@inproceedings{soykan-etal-2022-comparison,
title = "A Comparison of Machine Learning Techniques for {T}urkish Profanity Detection",
author = "Soykan, Levent and
Karsak, Cihan and
Durgar Elkahlout, Ilknur and
Aytan, Burak",
editor = "Monti, Johanna and
Basile, Valerio and
Buono, Maria Pia Di and
Manna, Raffaele and
Pascucci, Antonio and
Tonelli, Sara",
booktitle = "Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.restup-1.3",
pages = "16--24",
abstract = "Profanity detection became an important task with the increase of social media usage. Most of the users prefer a clean and profanity free environment to communicate with others. In order to provide a such environment for the users, service providers are using various profanity detection tools. In this paper, we researched on Turkish profanity detection in our search engine. We collected and labeled a dataset from search engine queries as one of the two classes: profane and not-profane. We experimented with several classical machine learning and deep learning methods and compared methods in means of speed and accuracy. We performed our best scores with transformer based Electra model with 0.93 F1 Score. We also compared our models with the state-of-the-art Turkish profanity detection tool and observed that we outperform it from all aspects.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soykan-etal-2022-comparison">
<titleInfo>
<title>A Comparison of Machine Learning Techniques for Turkish Profanity Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Levent</namePart>
<namePart type="family">Soykan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cihan</namePart>
<namePart type="family">Karsak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilknur</namePart>
<namePart type="family">Durgar Elkahlout</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burak</namePart>
<namePart type="family">Aytan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raffaele</namePart>
<namePart type="family">Manna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Pascucci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Profanity detection became an important task with the increase of social media usage. Most of the users prefer a clean and profanity free environment to communicate with others. In order to provide a such environment for the users, service providers are using various profanity detection tools. In this paper, we researched on Turkish profanity detection in our search engine. We collected and labeled a dataset from search engine queries as one of the two classes: profane and not-profane. We experimented with several classical machine learning and deep learning methods and compared methods in means of speed and accuracy. We performed our best scores with transformer based Electra model with 0.93 F1 Score. We also compared our models with the state-of-the-art Turkish profanity detection tool and observed that we outperform it from all aspects.</abstract>
<identifier type="citekey">soykan-etal-2022-comparison</identifier>
<location>
<url>https://aclanthology.org/2022.restup-1.3</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>16</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparison of Machine Learning Techniques for Turkish Profanity Detection
%A Soykan, Levent
%A Karsak, Cihan
%A Durgar Elkahlout, Ilknur
%A Aytan, Burak
%Y Monti, Johanna
%Y Basile, Valerio
%Y Buono, Maria Pia Di
%Y Manna, Raffaele
%Y Pascucci, Antonio
%Y Tonelli, Sara
%S Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F soykan-etal-2022-comparison
%X Profanity detection became an important task with the increase of social media usage. Most of the users prefer a clean and profanity free environment to communicate with others. In order to provide a such environment for the users, service providers are using various profanity detection tools. In this paper, we researched on Turkish profanity detection in our search engine. We collected and labeled a dataset from search engine queries as one of the two classes: profane and not-profane. We experimented with several classical machine learning and deep learning methods and compared methods in means of speed and accuracy. We performed our best scores with transformer based Electra model with 0.93 F1 Score. We also compared our models with the state-of-the-art Turkish profanity detection tool and observed that we outperform it from all aspects.
%U https://aclanthology.org/2022.restup-1.3
%P 16-24
Markdown (Informal)
[A Comparison of Machine Learning Techniques for Turkish Profanity Detection](https://aclanthology.org/2022.restup-1.3) (Soykan et al., ResTUP 2022)
ACL