@inproceedings{balaga-etal-2025-power,
title = "Power doesn{'}t reside in size: A Low Parameter Hybrid Language Model ({HLM}) for Sentiment Analysis in Code-mixed data",
author = "Balaga, Pavan Sai and
Karthik, Nagasamudram and
Vishwanath, Challa and
Sharma, Raksha and
Murthy, Rudra and
Mittal, Ashish",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.749/",
pages = "14819--14827",
ISBN = "979-8-89176-332-6",
abstract = "Code-mixed text{---}where multiple languages are used within the same utterance{---}is increasingly common in both spoken and written communication. However, it presents significant challenges for machine learning models due to the interplay of distinct grammatical structures, effectively forming a hybrid language. While fine-tuning large language models (LLMs) such as GPT-3, or Llama-3 on code-mixed data has led to performance improvements, these models still lag behind their monolingual counterparts and incur high computational costs due to the large number of trainable parameters.In this paper, we focus on the task of sentiment detection in code-mixed text and propose a Hybrid Language Model (HLM) that combines a multilingual encoder (e.g., mBERT) with a lightweight decoder (e.g., Sarvam-1) (3B parameters). Despite having significantly fewer trainable parameters, HLM achieves sentiment classification performance comparable to that of fine-tuned Large Language Models (LLMs) ({\ensuremath{>}} 7B parameters). Furthermore, our results demonstrate that HLM significantly outperforms models trained individually, underscoring its effectiveness for low-resource, code-mixed sentiment analysis."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="balaga-etal-2025-power">
<titleInfo>
<title>Power doesn’t reside in size: A Low Parameter Hybrid Language Model (HLM) for Sentiment Analysis in Code-mixed data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pavan</namePart>
<namePart type="given">Sai</namePart>
<namePart type="family">Balaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nagasamudram</namePart>
<namePart type="family">Karthik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Challa</namePart>
<namePart type="family">Vishwanath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raksha</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rudra</namePart>
<namePart type="family">Murthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Code-mixed text—where multiple languages are used within the same utterance—is increasingly common in both spoken and written communication. However, it presents significant challenges for machine learning models due to the interplay of distinct grammatical structures, effectively forming a hybrid language. While fine-tuning large language models (LLMs) such as GPT-3, or Llama-3 on code-mixed data has led to performance improvements, these models still lag behind their monolingual counterparts and incur high computational costs due to the large number of trainable parameters.In this paper, we focus on the task of sentiment detection in code-mixed text and propose a Hybrid Language Model (HLM) that combines a multilingual encoder (e.g., mBERT) with a lightweight decoder (e.g., Sarvam-1) (3B parameters). Despite having significantly fewer trainable parameters, HLM achieves sentiment classification performance comparable to that of fine-tuned Large Language Models (LLMs) (\ensuremath> 7B parameters). Furthermore, our results demonstrate that HLM significantly outperforms models trained individually, underscoring its effectiveness for low-resource, code-mixed sentiment analysis.</abstract>
<identifier type="citekey">balaga-etal-2025-power</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.749/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>14819</start>
<end>14827</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Power doesn’t reside in size: A Low Parameter Hybrid Language Model (HLM) for Sentiment Analysis in Code-mixed data
%A Balaga, Pavan Sai
%A Karthik, Nagasamudram
%A Vishwanath, Challa
%A Sharma, Raksha
%A Murthy, Rudra
%A Mittal, Ashish
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F balaga-etal-2025-power
%X Code-mixed text—where multiple languages are used within the same utterance—is increasingly common in both spoken and written communication. However, it presents significant challenges for machine learning models due to the interplay of distinct grammatical structures, effectively forming a hybrid language. While fine-tuning large language models (LLMs) such as GPT-3, or Llama-3 on code-mixed data has led to performance improvements, these models still lag behind their monolingual counterparts and incur high computational costs due to the large number of trainable parameters.In this paper, we focus on the task of sentiment detection in code-mixed text and propose a Hybrid Language Model (HLM) that combines a multilingual encoder (e.g., mBERT) with a lightweight decoder (e.g., Sarvam-1) (3B parameters). Despite having significantly fewer trainable parameters, HLM achieves sentiment classification performance comparable to that of fine-tuned Large Language Models (LLMs) (\ensuremath> 7B parameters). Furthermore, our results demonstrate that HLM significantly outperforms models trained individually, underscoring its effectiveness for low-resource, code-mixed sentiment analysis.
%U https://aclanthology.org/2025.emnlp-main.749/
%P 14819-14827
Markdown (Informal)
[Power doesn’t reside in size: A Low Parameter Hybrid Language Model (HLM) for Sentiment Analysis in Code-mixed data](https://aclanthology.org/2025.emnlp-main.749/) (Balaga et al., EMNLP 2025)
ACL