@inproceedings{dewangan-etal-2025-benchmark,
title = "Benchmark Creation for Aspect-Based Sentiment Analysis in Low-Resource {O}dia Language and Evaluation through Fine-Tuning of Multilingual Models",
author = "Dewangan, Lipika and
Sayeed, Zoyah Afsheen and
Maurya, Chandresh",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.391/",
pages = "5863--5869",
abstract = "The rapid growth of online product reviews spurs significant interest in Aspect-Based Sentiment Analysis (ABSA), which involves identifying aspect terms and their associated sentiment polarity. While ABSA is widely studied in resource-rich languages like English, Chinese, and Spanish, it remains underexplored in low-resource languages such as Odia. To address this gap, we create a reliable resource for aspect-based sentiment analysis in Odia. The dataset is annotated for two specific tasks: Aspect Term Extraction (ATE) and Aspect Polarity Classification (APC), spanning seven domains and aligned with the SemEval-2014 benchmark. Furthermore, we employ an ensemble data augmentation approach combining back-translation with a fine-tuned T5 paraphrase generation model to enhance the dataset and apply a semantic similarity filter using a Universal Sentence Encoder (USE) to remove low-quality data and ensure a balanced distribution of sample difficulty in the newly augmented dataset. Finally, we validate our dataset by fine-tuning multilingual pre-trained models, XLM-R and IndicBERT, on ATE and APC tasks. Additionally, we use three classical baseline models to evaluate the quality of the proposed dataset for these tasks. We hope the Odia dataset will spur more work for the ABSA task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dewangan-etal-2025-benchmark">
<titleInfo>
<title>Benchmark Creation for Aspect-Based Sentiment Analysis in Low-Resource Odia Language and Evaluation through Fine-Tuning of Multilingual Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lipika</namePart>
<namePart type="family">Dewangan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zoyah</namePart>
<namePart type="given">Afsheen</namePart>
<namePart type="family">Sayeed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chandresh</namePart>
<namePart type="family">Maurya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The rapid growth of online product reviews spurs significant interest in Aspect-Based Sentiment Analysis (ABSA), which involves identifying aspect terms and their associated sentiment polarity. While ABSA is widely studied in resource-rich languages like English, Chinese, and Spanish, it remains underexplored in low-resource languages such as Odia. To address this gap, we create a reliable resource for aspect-based sentiment analysis in Odia. The dataset is annotated for two specific tasks: Aspect Term Extraction (ATE) and Aspect Polarity Classification (APC), spanning seven domains and aligned with the SemEval-2014 benchmark. Furthermore, we employ an ensemble data augmentation approach combining back-translation with a fine-tuned T5 paraphrase generation model to enhance the dataset and apply a semantic similarity filter using a Universal Sentence Encoder (USE) to remove low-quality data and ensure a balanced distribution of sample difficulty in the newly augmented dataset. Finally, we validate our dataset by fine-tuning multilingual pre-trained models, XLM-R and IndicBERT, on ATE and APC tasks. Additionally, we use three classical baseline models to evaluate the quality of the proposed dataset for these tasks. We hope the Odia dataset will spur more work for the ABSA task.</abstract>
<identifier type="citekey">dewangan-etal-2025-benchmark</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.391/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5863</start>
<end>5869</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmark Creation for Aspect-Based Sentiment Analysis in Low-Resource Odia Language and Evaluation through Fine-Tuning of Multilingual Models
%A Dewangan, Lipika
%A Sayeed, Zoyah Afsheen
%A Maurya, Chandresh
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F dewangan-etal-2025-benchmark
%X The rapid growth of online product reviews spurs significant interest in Aspect-Based Sentiment Analysis (ABSA), which involves identifying aspect terms and their associated sentiment polarity. While ABSA is widely studied in resource-rich languages like English, Chinese, and Spanish, it remains underexplored in low-resource languages such as Odia. To address this gap, we create a reliable resource for aspect-based sentiment analysis in Odia. The dataset is annotated for two specific tasks: Aspect Term Extraction (ATE) and Aspect Polarity Classification (APC), spanning seven domains and aligned with the SemEval-2014 benchmark. Furthermore, we employ an ensemble data augmentation approach combining back-translation with a fine-tuned T5 paraphrase generation model to enhance the dataset and apply a semantic similarity filter using a Universal Sentence Encoder (USE) to remove low-quality data and ensure a balanced distribution of sample difficulty in the newly augmented dataset. Finally, we validate our dataset by fine-tuning multilingual pre-trained models, XLM-R and IndicBERT, on ATE and APC tasks. Additionally, we use three classical baseline models to evaluate the quality of the proposed dataset for these tasks. We hope the Odia dataset will spur more work for the ABSA task.
%U https://aclanthology.org/2025.coling-main.391/
%P 5863-5869
Markdown (Informal)
[Benchmark Creation for Aspect-Based Sentiment Analysis in Low-Resource Odia Language and Evaluation through Fine-Tuning of Multilingual Models](https://aclanthology.org/2025.coling-main.391/) (Dewangan et al., COLING 2025)
ACL