@inproceedings{rooein-etal-2026-exploring,
title = "Exploring Subjective Tasks in {F}arsi: A Survey Analysis and Evaluation of Language Model",
author = "Rooein, Donya and
Plaza-del-Arco, Flor Miriam and
Nozza, Debora and
Hovy, Dirk",
editor = "Barnes, Jeremy and
Barriere, Valentin and
De Clercq, Orph{\'e}e and
Klinger, Roman and
Nouri, C{\'e}lia and
Nozza, Debora and
Singh, Pranaydeep",
booktitle = "The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis ({WASSA} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.wassa-1.8/",
pages = "83--95",
ISBN = "979-8-89176-378-4",
abstract = "Given Farsi{'}s speaker base of over 127 million people and the growing availability of digital text, including more than 1.3 million articles on Wikipedia, it is considered a middle-resource language. However, this label quickly crumbles when the situation is examined more closely. We focus on three subjective tasks (Sentiment Analysis, Emotion Analysis, and Toxicity Detection) and identify significant challenges in data availability and quality, despite overall increases in data availability. We review 110 publications on subjective tasks in Farsi and observe a lack of publicly available datasets. Furthermore, existing datasets often lack essential demographic factors, such as age and gender, that are crucial for accurately modeling subjectivity in language. When evaluating prediction models using the few available datasets, the results are highly unstable across both datasets and models. Our findings show that the volume of data alone is insufficient to improve a language{'}s standing in NLP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rooein-etal-2026-exploring">
<titleInfo>
<title>Exploring Subjective Tasks in Farsi: A Survey Analysis and Evaluation of Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donya</namePart>
<namePart type="family">Rooein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flor</namePart>
<namePart type="given">Miriam</namePart>
<namePart type="family">Plaza-del-Arco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis (WASSA 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Barnes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Barriere</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orphée</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Célia</namePart>
<namePart type="family">Nouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranaydeep</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-378-4</identifier>
</relatedItem>
<abstract>Given Farsi’s speaker base of over 127 million people and the growing availability of digital text, including more than 1.3 million articles on Wikipedia, it is considered a middle-resource language. However, this label quickly crumbles when the situation is examined more closely. We focus on three subjective tasks (Sentiment Analysis, Emotion Analysis, and Toxicity Detection) and identify significant challenges in data availability and quality, despite overall increases in data availability. We review 110 publications on subjective tasks in Farsi and observe a lack of publicly available datasets. Furthermore, existing datasets often lack essential demographic factors, such as age and gender, that are crucial for accurately modeling subjectivity in language. When evaluating prediction models using the few available datasets, the results are highly unstable across both datasets and models. Our findings show that the volume of data alone is insufficient to improve a language’s standing in NLP.</abstract>
<identifier type="citekey">rooein-etal-2026-exploring</identifier>
<location>
<url>https://aclanthology.org/2026.wassa-1.8/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>83</start>
<end>95</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Subjective Tasks in Farsi: A Survey Analysis and Evaluation of Language Model
%A Rooein, Donya
%A Plaza-del-Arco, Flor Miriam
%A Nozza, Debora
%A Hovy, Dirk
%Y Barnes, Jeremy
%Y Barriere, Valentin
%Y De Clercq, Orphée
%Y Klinger, Roman
%Y Nouri, Célia
%Y Nozza, Debora
%Y Singh, Pranaydeep
%S The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis (WASSA 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-378-4
%F rooein-etal-2026-exploring
%X Given Farsi’s speaker base of over 127 million people and the growing availability of digital text, including more than 1.3 million articles on Wikipedia, it is considered a middle-resource language. However, this label quickly crumbles when the situation is examined more closely. We focus on three subjective tasks (Sentiment Analysis, Emotion Analysis, and Toxicity Detection) and identify significant challenges in data availability and quality, despite overall increases in data availability. We review 110 publications on subjective tasks in Farsi and observe a lack of publicly available datasets. Furthermore, existing datasets often lack essential demographic factors, such as age and gender, that are crucial for accurately modeling subjectivity in language. When evaluating prediction models using the few available datasets, the results are highly unstable across both datasets and models. Our findings show that the volume of data alone is insufficient to improve a language’s standing in NLP.
%U https://aclanthology.org/2026.wassa-1.8/
%P 83-95
Markdown (Informal)
[Exploring Subjective Tasks in Farsi: A Survey Analysis and Evaluation of Language Model](https://aclanthology.org/2026.wassa-1.8/) (Rooein et al., WASSA 2026)
ACL