@inproceedings{bokaei-etal-2026-benchmarking,
title = "Benchmarking Offensive Language Detection in {P}ersian and {P}ashto",
author = "Bokaei, Zahra and
Webber, Bonnie and
Magdy, Walid",
editor = "Merchant, Rayyan and
Megerdoomian, Karine",
booktitle = "The Proceedings of the First Workshop on {NLP} and {LLM}s for the {I}ranian Language Family",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.silkroadnlp-1.2/",
pages = "13--23",
ISBN = "979-8-89176-371-5",
abstract = "Offensive language detection and target identification are essential for maintaining respectful online environments. While these tasks have been widely studied for English, comparatively less attention has been given to other language, including Persian and Pashto, and the effectiveness of recent large language models for these languages remains underexplored. To address this gap, we created a comprehensive benchmark of diverse modeling approaches in Persian and Pashto. Our evaluation covers zeroshot, fine-tuned, and cross-lingual transfer settings, analyzing when detection succeeds or fails across different model approaches. This study provides one of the first systematic analyses of offensive language detection and crosslingual transfer between these languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bokaei-etal-2026-benchmarking">
<titleInfo>
<title>Benchmarking Offensive Language Detection in Persian and Pashto</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zahra</namePart>
<namePart type="family">Bokaei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rayyan</namePart>
<namePart type="family">Merchant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karine</namePart>
<namePart type="family">Megerdoomian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-371-5</identifier>
</relatedItem>
<abstract>Offensive language detection and target identification are essential for maintaining respectful online environments. While these tasks have been widely studied for English, comparatively less attention has been given to other language, including Persian and Pashto, and the effectiveness of recent large language models for these languages remains underexplored. To address this gap, we created a comprehensive benchmark of diverse modeling approaches in Persian and Pashto. Our evaluation covers zeroshot, fine-tuned, and cross-lingual transfer settings, analyzing when detection succeeds or fails across different model approaches. This study provides one of the first systematic analyses of offensive language detection and crosslingual transfer between these languages.</abstract>
<identifier type="citekey">bokaei-etal-2026-benchmarking</identifier>
<location>
<url>https://aclanthology.org/2026.silkroadnlp-1.2/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>13</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking Offensive Language Detection in Persian and Pashto
%A Bokaei, Zahra
%A Webber, Bonnie
%A Magdy, Walid
%Y Merchant, Rayyan
%Y Megerdoomian, Karine
%S The Proceedings of the First Workshop on NLP and LLMs for the Iranian Language Family
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-371-5
%F bokaei-etal-2026-benchmarking
%X Offensive language detection and target identification are essential for maintaining respectful online environments. While these tasks have been widely studied for English, comparatively less attention has been given to other language, including Persian and Pashto, and the effectiveness of recent large language models for these languages remains underexplored. To address this gap, we created a comprehensive benchmark of diverse modeling approaches in Persian and Pashto. Our evaluation covers zeroshot, fine-tuned, and cross-lingual transfer settings, analyzing when detection succeeds or fails across different model approaches. This study provides one of the first systematic analyses of offensive language detection and crosslingual transfer between these languages.
%U https://aclanthology.org/2026.silkroadnlp-1.2/
%P 13-23
Markdown (Informal)
[Benchmarking Offensive Language Detection in Persian and Pashto](https://aclanthology.org/2026.silkroadnlp-1.2/) (Bokaei et al., SilkRoadNLP 2026)
ACL