@inproceedings{nawaz-etal-2026-shahiemotion,
title = "{S}hahi{E}motion: A Benchmark Dataset for {P}unjabi Shahmukhi Emotion Detection",
author = "Nawaz, Usman and
Iqbal, Muhammad Junaid and
Alyas, Tahir and
Asaf, Muhammad and
Yaqoob, Shumayla and
Raza, Usman Ahmed and
Nadim, Muhammad Amin and
Rafique, Aftab and
Rehman, Faisal",
editor = "Huang, Kaiyu and
Mo, Fengran and
Chen, Pinzhen and
Jiang, Meng",
booktitle = "Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models ({M}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mellm-1.20/",
pages = "211--220",
ISBN = "979-8-89176-430-9",
abstract = "Emotion detection is an important text classification task with applications in sentiment analysis, social media monitoring, human-computer interaction, and affective language understanding. However, Punjabi written in the Shahmukhi script remains severely under-resourced for emotion detection, with limited benchmark-style resources available for supervised evaluation. This paper introduces ShahiEmotion, a new Punjabi Shahmukhi emotion detection dataset containing 30379 sentence-level instances annotated with seven emotion categories: sadness, surprise, happiness, anger, neutral, fear, and disgust. The dataset is designed to support research in a low-resource setting characterized by script-specific challenges, lexical variation, and substantial class imbalance. We establish baseline results using several pretrained transformer-based models and formulate emotion detection as a sentence-level classification task. In particular, we fine-tune multilingual BERT, multilingual DistilBERT, XLM-RoBERTa, and Urdu RoBERTa under the same training and evaluation setting using standard cross-entropy loss. Experimental results show that XLM-RoBERTa provides the strongest overall performance among the compared models. The best model achieves 77.95{\%} accuracy, 58.47{\%} macro-F1, and 77.60{\%} weighted-F1 on the test set. The dataset, evaluation protocol, and baseline results introduced in this work are intended to support future research on Punjabi Shahmukhi emotion analysis and low-resource NLP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nawaz-etal-2026-shahiemotion">
<titleInfo>
<title>ShahiEmotion: A Benchmark Dataset for Punjabi Shahmukhi Emotion Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Usman</namePart>
<namePart type="family">Nawaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Junaid</namePart>
<namePart type="family">Iqbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tahir</namePart>
<namePart type="family">Alyas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Asaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shumayla</namePart>
<namePart type="family">Yaqoob</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Usman</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="family">Raza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Amin</namePart>
<namePart type="family">Nadim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aftab</namePart>
<namePart type="family">Rafique</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faisal</namePart>
<namePart type="family">Rehman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengran</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pinzhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-430-9</identifier>
</relatedItem>
<abstract>Emotion detection is an important text classification task with applications in sentiment analysis, social media monitoring, human-computer interaction, and affective language understanding. However, Punjabi written in the Shahmukhi script remains severely under-resourced for emotion detection, with limited benchmark-style resources available for supervised evaluation. This paper introduces ShahiEmotion, a new Punjabi Shahmukhi emotion detection dataset containing 30379 sentence-level instances annotated with seven emotion categories: sadness, surprise, happiness, anger, neutral, fear, and disgust. The dataset is designed to support research in a low-resource setting characterized by script-specific challenges, lexical variation, and substantial class imbalance. We establish baseline results using several pretrained transformer-based models and formulate emotion detection as a sentence-level classification task. In particular, we fine-tune multilingual BERT, multilingual DistilBERT, XLM-RoBERTa, and Urdu RoBERTa under the same training and evaluation setting using standard cross-entropy loss. Experimental results show that XLM-RoBERTa provides the strongest overall performance among the compared models. The best model achieves 77.95% accuracy, 58.47% macro-F1, and 77.60% weighted-F1 on the test set. The dataset, evaluation protocol, and baseline results introduced in this work are intended to support future research on Punjabi Shahmukhi emotion analysis and low-resource NLP.</abstract>
<identifier type="citekey">nawaz-etal-2026-shahiemotion</identifier>
<location>
<url>https://aclanthology.org/2026.mellm-1.20/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>211</start>
<end>220</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ShahiEmotion: A Benchmark Dataset for Punjabi Shahmukhi Emotion Detection
%A Nawaz, Usman
%A Iqbal, Muhammad Junaid
%A Alyas, Tahir
%A Asaf, Muhammad
%A Yaqoob, Shumayla
%A Raza, Usman Ahmed
%A Nadim, Muhammad Amin
%A Rafique, Aftab
%A Rehman, Faisal
%Y Huang, Kaiyu
%Y Mo, Fengran
%Y Chen, Pinzhen
%Y Jiang, Meng
%S Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-430-9
%F nawaz-etal-2026-shahiemotion
%X Emotion detection is an important text classification task with applications in sentiment analysis, social media monitoring, human-computer interaction, and affective language understanding. However, Punjabi written in the Shahmukhi script remains severely under-resourced for emotion detection, with limited benchmark-style resources available for supervised evaluation. This paper introduces ShahiEmotion, a new Punjabi Shahmukhi emotion detection dataset containing 30379 sentence-level instances annotated with seven emotion categories: sadness, surprise, happiness, anger, neutral, fear, and disgust. The dataset is designed to support research in a low-resource setting characterized by script-specific challenges, lexical variation, and substantial class imbalance. We establish baseline results using several pretrained transformer-based models and formulate emotion detection as a sentence-level classification task. In particular, we fine-tune multilingual BERT, multilingual DistilBERT, XLM-RoBERTa, and Urdu RoBERTa under the same training and evaluation setting using standard cross-entropy loss. Experimental results show that XLM-RoBERTa provides the strongest overall performance among the compared models. The best model achieves 77.95% accuracy, 58.47% macro-F1, and 77.60% weighted-F1 on the test set. The dataset, evaluation protocol, and baseline results introduced in this work are intended to support future research on Punjabi Shahmukhi emotion analysis and low-resource NLP.
%U https://aclanthology.org/2026.mellm-1.20/
%P 211-220
Markdown (Informal)
[ShahiEmotion: A Benchmark Dataset for Punjabi Shahmukhi Emotion Detection](https://aclanthology.org/2026.mellm-1.20/) (Nawaz et al., MeLLM 2026)
ACL
- Usman Nawaz, Muhammad Junaid Iqbal, Tahir Alyas, Muhammad Asaf, Shumayla Yaqoob, Usman Ahmed Raza, Muhammad Amin Nadim, Aftab Rafique, and Faisal Rehman. 2026. ShahiEmotion: A Benchmark Dataset for Punjabi Shahmukhi Emotion Detection. In Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026), pages 211–220, San Diego, United States. Association for Computational Linguistics.