@inproceedings{kumar-etal-2025-marathiemoexplain,
title = "{M}arathi{E}mo{E}xplain: A Dataset for Sentiment, Emotion, and Explanation in Low-Resource {M}arathi",
author = "Kumar, Anuj and
Sayed, Mohammed Faisal and
Ahlawat, Satyadev and
Prasad, Yamuna",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.712/",
pages = "13234--13243",
ISBN = "979-8-89176-335-7",
abstract = "Marathi, the third most widely spoken language in India with over 83 million native speakers, remains significantly underrepresented in Natural Language Processing (NLP) research. While sentiment analysis has achieved substantial progress in high-resource languages such as English, Chinese, and Hindi, available Marathi datasets are limited to coarse sentiment labels and lack fine-grained emotional categorization or interpretability through explanations. To address this gap, we present a new annotated dataset of 10,762 Marathi sentences, each labeled with sentiment (positive, negative, or neutral), emotion (joy, anger, surprise, disgust, sadness, fear, or neutral), and a corresponding natural language justification. Justifications are written in English and generated using GPT-4 under a human-in-the-loop framework to ensure label fidelity and contextual alignment. Extensive experiments with both classical and transformer-based models demonstrate the effectiveness of the dataset for interpretable affective computing in a low-resource language setting, offering a benchmark for future research in multilingual and explainable NLP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2025-marathiemoexplain">
<titleInfo>
<title>MarathiEmoExplain: A Dataset for Sentiment, Emotion, and Explanation in Low-Resource Marathi</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anuj</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="given">Faisal</namePart>
<namePart type="family">Sayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satyadev</namePart>
<namePart type="family">Ahlawat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yamuna</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Marathi, the third most widely spoken language in India with over 83 million native speakers, remains significantly underrepresented in Natural Language Processing (NLP) research. While sentiment analysis has achieved substantial progress in high-resource languages such as English, Chinese, and Hindi, available Marathi datasets are limited to coarse sentiment labels and lack fine-grained emotional categorization or interpretability through explanations. To address this gap, we present a new annotated dataset of 10,762 Marathi sentences, each labeled with sentiment (positive, negative, or neutral), emotion (joy, anger, surprise, disgust, sadness, fear, or neutral), and a corresponding natural language justification. Justifications are written in English and generated using GPT-4 under a human-in-the-loop framework to ensure label fidelity and contextual alignment. Extensive experiments with both classical and transformer-based models demonstrate the effectiveness of the dataset for interpretable affective computing in a low-resource language setting, offering a benchmark for future research in multilingual and explainable NLP.</abstract>
<identifier type="citekey">kumar-etal-2025-marathiemoexplain</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.712/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>13234</start>
<end>13243</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MarathiEmoExplain: A Dataset for Sentiment, Emotion, and Explanation in Low-Resource Marathi
%A Kumar, Anuj
%A Sayed, Mohammed Faisal
%A Ahlawat, Satyadev
%A Prasad, Yamuna
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F kumar-etal-2025-marathiemoexplain
%X Marathi, the third most widely spoken language in India with over 83 million native speakers, remains significantly underrepresented in Natural Language Processing (NLP) research. While sentiment analysis has achieved substantial progress in high-resource languages such as English, Chinese, and Hindi, available Marathi datasets are limited to coarse sentiment labels and lack fine-grained emotional categorization or interpretability through explanations. To address this gap, we present a new annotated dataset of 10,762 Marathi sentences, each labeled with sentiment (positive, negative, or neutral), emotion (joy, anger, surprise, disgust, sadness, fear, or neutral), and a corresponding natural language justification. Justifications are written in English and generated using GPT-4 under a human-in-the-loop framework to ensure label fidelity and contextual alignment. Extensive experiments with both classical and transformer-based models demonstrate the effectiveness of the dataset for interpretable affective computing in a low-resource language setting, offering a benchmark for future research in multilingual and explainable NLP.
%U https://aclanthology.org/2025.findings-emnlp.712/
%P 13234-13243
Markdown (Informal)
[MarathiEmoExplain: A Dataset for Sentiment, Emotion, and Explanation in Low-Resource Marathi](https://aclanthology.org/2025.findings-emnlp.712/) (Kumar et al., Findings 2025)
ACL