@inproceedings{thevakumar-etal-2025-emota,
title = "{E}mo{T}a: A {T}amil Emotional Speech Dataset",
author = "Thevakumar, Jubeerathan and
Thavarasa, Luxshan and
Sivatheepan, Thanikan and
Kugarajah, Sajeev and
Thayasivam, Uthayasanker",
editor = "Sarveswaran, Kengatharaiyer and
Vaidya, Ashwini and
Krishna Bal, Bal and
Shams, Sana and
Thapa, Surendrabikram",
booktitle = "Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.chipsal-1.19/",
pages = "193--201",
abstract = "This paper introduces EmoTa, the first emotional speech dataset in Tamil, designed to reflect the linguistic diversity of Sri Lankan Tamil speakers. EmoTa comprises 936 recorded utterances from 22 native Tamil speakers (11 male, 11 female), each articulating 19 semantically neutral sentences across five primary emotions: anger, happiness, sadness, fear, and neutrality. To ensure quality, inter-annotator agreement was assessed using Fleiss' Kappa, resulting in a substantial agreement score of 0.74. Initial evaluations using machine learning models, including XGBoost and Random Forest, yielded a high F1-score of 0.91 and 0.90 for emotion classification tasks. By releasing EmoTa, we aim to encourage further exploration of Tamil language processing and the development of innovative models for Tamil Speech Emotion Recognition."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thevakumar-etal-2025-emota">
<titleInfo>
<title>EmoTa: A Tamil Emotional Speech Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jubeerathan</namePart>
<namePart type="family">Thevakumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luxshan</namePart>
<namePart type="family">Thavarasa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thanikan</namePart>
<namePart type="family">Sivatheepan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeev</namePart>
<namePart type="family">Kugarajah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Uthayasanker</namePart>
<namePart type="family">Thayasivam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashwini</namePart>
<namePart type="family">Vaidya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="family">Krishna Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sana</namePart>
<namePart type="family">Shams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces EmoTa, the first emotional speech dataset in Tamil, designed to reflect the linguistic diversity of Sri Lankan Tamil speakers. EmoTa comprises 936 recorded utterances from 22 native Tamil speakers (11 male, 11 female), each articulating 19 semantically neutral sentences across five primary emotions: anger, happiness, sadness, fear, and neutrality. To ensure quality, inter-annotator agreement was assessed using Fleiss’ Kappa, resulting in a substantial agreement score of 0.74. Initial evaluations using machine learning models, including XGBoost and Random Forest, yielded a high F1-score of 0.91 and 0.90 for emotion classification tasks. By releasing EmoTa, we aim to encourage further exploration of Tamil language processing and the development of innovative models for Tamil Speech Emotion Recognition.</abstract>
<identifier type="citekey">thevakumar-etal-2025-emota</identifier>
<location>
<url>https://aclanthology.org/2025.chipsal-1.19/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>193</start>
<end>201</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EmoTa: A Tamil Emotional Speech Dataset
%A Thevakumar, Jubeerathan
%A Thavarasa, Luxshan
%A Sivatheepan, Thanikan
%A Kugarajah, Sajeev
%A Thayasivam, Uthayasanker
%Y Sarveswaran, Kengatharaiyer
%Y Vaidya, Ashwini
%Y Krishna Bal, Bal
%Y Shams, Sana
%Y Thapa, Surendrabikram
%S Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F thevakumar-etal-2025-emota
%X This paper introduces EmoTa, the first emotional speech dataset in Tamil, designed to reflect the linguistic diversity of Sri Lankan Tamil speakers. EmoTa comprises 936 recorded utterances from 22 native Tamil speakers (11 male, 11 female), each articulating 19 semantically neutral sentences across five primary emotions: anger, happiness, sadness, fear, and neutrality. To ensure quality, inter-annotator agreement was assessed using Fleiss’ Kappa, resulting in a substantial agreement score of 0.74. Initial evaluations using machine learning models, including XGBoost and Random Forest, yielded a high F1-score of 0.91 and 0.90 for emotion classification tasks. By releasing EmoTa, we aim to encourage further exploration of Tamil language processing and the development of innovative models for Tamil Speech Emotion Recognition.
%U https://aclanthology.org/2025.chipsal-1.19/
%P 193-201
Markdown (Informal)
[EmoTa: A Tamil Emotional Speech Dataset](https://aclanthology.org/2025.chipsal-1.19/) (Thevakumar et al., CHiPSAL 2025)
ACL
- Jubeerathan Thevakumar, Luxshan Thavarasa, Thanikan Sivatheepan, Sajeev Kugarajah, and Uthayasanker Thayasivam. 2025. EmoTa: A Tamil Emotional Speech Dataset. In Proceedings of the First Workshop on Challenges in Processing South Asian Languages (CHiPSAL 2025), pages 193–201, Abu Dhabi, UAE. International Committee on Computational Linguistics.