@inproceedings{ihtiyar-etal-2023-dataset,
title = "A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets",
author = {{\.I}htiyar, Musa and
{\"O}zdemir, {\"O}mer and
Ereng{\"u}l, Mustafa and
{\"O}zg{\"u}r, Arzucan},
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.103",
doi = "10.18653/v1/2023.findings-emnlp.103",
pages = "1543--1549",
abstract = "Offensive language detection is crucial in natural language processing (NLP). We investigated the importance of context for detecting such language in reply tweets on Twitter, where the use of offensive language is widespread. We collected a Turkish tweet dataset where the target group was unvaccinated people during the Covid period. Tweets in the dataset were enriched with contextual information by adding the original tweet to which a particular tweet was posted as a reply. The dataset, which includes over 28,000 tweet-reply pairs, was manually labeled by human annotators and made publicly available. In addition, we compared the performance of different machine learning models with and without contextual information. Our results show that this type of contextual information was not very useful in improving the performance of the models in general, although it slightly increased the macro-averaged F1-score of certain models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ihtiyar-etal-2023-dataset">
<titleInfo>
<title>A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Musa</namePart>
<namePart type="family">İhtiyar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ömer</namePart>
<namePart type="family">Özdemir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Erengül</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arzucan</namePart>
<namePart type="family">Özgür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Offensive language detection is crucial in natural language processing (NLP). We investigated the importance of context for detecting such language in reply tweets on Twitter, where the use of offensive language is widespread. We collected a Turkish tweet dataset where the target group was unvaccinated people during the Covid period. Tweets in the dataset were enriched with contextual information by adding the original tweet to which a particular tweet was posted as a reply. The dataset, which includes over 28,000 tweet-reply pairs, was manually labeled by human annotators and made publicly available. In addition, we compared the performance of different machine learning models with and without contextual information. Our results show that this type of contextual information was not very useful in improving the performance of the models in general, although it slightly increased the macro-averaged F1-score of certain models.</abstract>
<identifier type="citekey">ihtiyar-etal-2023-dataset</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.103</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.103</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>1543</start>
<end>1549</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets
%A İhtiyar, Musa
%A Özdemir, Ömer
%A Erengül, Mustafa
%A Özgür, Arzucan
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F ihtiyar-etal-2023-dataset
%X Offensive language detection is crucial in natural language processing (NLP). We investigated the importance of context for detecting such language in reply tweets on Twitter, where the use of offensive language is widespread. We collected a Turkish tweet dataset where the target group was unvaccinated people during the Covid period. Tweets in the dataset were enriched with contextual information by adding the original tweet to which a particular tweet was posted as a reply. The dataset, which includes over 28,000 tweet-reply pairs, was manually labeled by human annotators and made publicly available. In addition, we compared the performance of different machine learning models with and without contextual information. Our results show that this type of contextual information was not very useful in improving the performance of the models in general, although it slightly increased the macro-averaged F1-score of certain models.
%R 10.18653/v1/2023.findings-emnlp.103
%U https://aclanthology.org/2023.findings-emnlp.103
%U https://doi.org/10.18653/v1/2023.findings-emnlp.103
%P 1543-1549
Markdown (Informal)
[A Dataset for Investigating the Impact of Context for Offensive Language Detection in Tweets](https://aclanthology.org/2023.findings-emnlp.103) (İhtiyar et al., Findings 2023)
ACL