@inproceedings{sriram-etal-2026-databees,
title = "{D}ata{B}ees at {S}em{E}val-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization",
author = "Sriram, Tanisha and
Shankar, Sathvika and
Anand, Sowmya and
Sivanaiah, Rajalakshmi and
S, Angel Deborah and
Thankanadar, Mirnalinee",
editor = "Kochmar, Ekaterina and
Ghosh, Debanjan and
North, Kai and
Komachi, Mamoru",
booktitle = "Proceedings of the 20th {I}nternational {W}orkshop on {S}emantic {E}valuation (2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.semeval-1.268/",
pages = "2120--2125",
ISBN = "979-8-89176-414-9",
abstract = "This paper describes our submission toSemEval-2026 Task 9, Subtask 1: Multilingual Text Classification Challenge {---} Polarization Detection. Our focus is on how classicaland transformer-based models compare whenapplied to multilingual polarization detection.We aim to understand where each type tendsto do well and where it breaks down, particularly once you move from high-resource tolow-resource settings. Our experimental setupevaluates classical machine learning models(TFIDF with Naive Bayes, Logistic Regression, and Linear SVM) alongside languagespecific transformer models across multiplelanguages. For Arabic, Bengali, German, Italian, and Spanish, we leveraged both multilingual and monolingual pre-trained transformers such as mBERT, XLM-R, AraBERTv2,BanglaBERT, and BETO. We compare individual classical and transformer-based modelsto identify which modeling choices work bestfor each language. Our results varied substantially across languages. We achieved our bestleaderboard rankings in Bengali (6th out of 48teams) and Italian (6th out of 43 teams), whileperformance was lower in Arabic (33rd out of44), German (41st out of 44), and Spanish (46thout of 48). The study highlights the value ofcomparing classical and transformer-based approaches for multilingual polarization detectionand identifies language-specific challenges forfuture improvement."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sriram-etal-2026-databees">
<titleInfo>
<title>DataBees at SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanisha</namePart>
<namePart type="family">Sriram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sathvika</namePart>
<namePart type="family">Shankar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sowmya</namePart>
<namePart type="family">Anand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajalakshmi</namePart>
<namePart type="family">Sivanaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angel</namePart>
<namePart type="given">Deborah</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mirnalinee</namePart>
<namePart type="family">Thankanadar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Workshop on Semantic Evaluation (2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-414-9</identifier>
</relatedItem>
<abstract>This paper describes our submission toSemEval-2026 Task 9, Subtask 1: Multilingual Text Classification Challenge — Polarization Detection. Our focus is on how classicaland transformer-based models compare whenapplied to multilingual polarization detection.We aim to understand where each type tendsto do well and where it breaks down, particularly once you move from high-resource tolow-resource settings. Our experimental setupevaluates classical machine learning models(TFIDF with Naive Bayes, Logistic Regression, and Linear SVM) alongside languagespecific transformer models across multiplelanguages. For Arabic, Bengali, German, Italian, and Spanish, we leveraged both multilingual and monolingual pre-trained transformers such as mBERT, XLM-R, AraBERTv2,BanglaBERT, and BETO. We compare individual classical and transformer-based modelsto identify which modeling choices work bestfor each language. Our results varied substantially across languages. We achieved our bestleaderboard rankings in Bengali (6th out of 48teams) and Italian (6th out of 43 teams), whileperformance was lower in Arabic (33rd out of44), German (41st out of 44), and Spanish (46thout of 48). The study highlights the value ofcomparing classical and transformer-based approaches for multilingual polarization detectionand identifies language-specific challenges forfuture improvement.</abstract>
<identifier type="citekey">sriram-etal-2026-databees</identifier>
<location>
<url>https://aclanthology.org/2026.semeval-1.268/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>2120</start>
<end>2125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DataBees at SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization
%A Sriram, Tanisha
%A Shankar, Sathvika
%A Anand, Sowmya
%A Sivanaiah, Rajalakshmi
%A S, Angel Deborah
%A Thankanadar, Mirnalinee
%Y Kochmar, Ekaterina
%Y Ghosh, Debanjan
%Y North, Kai
%Y Komachi, Mamoru
%S Proceedings of the 20th International Workshop on Semantic Evaluation (2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-414-9
%F sriram-etal-2026-databees
%X This paper describes our submission toSemEval-2026 Task 9, Subtask 1: Multilingual Text Classification Challenge — Polarization Detection. Our focus is on how classicaland transformer-based models compare whenapplied to multilingual polarization detection.We aim to understand where each type tendsto do well and where it breaks down, particularly once you move from high-resource tolow-resource settings. Our experimental setupevaluates classical machine learning models(TFIDF with Naive Bayes, Logistic Regression, and Linear SVM) alongside languagespecific transformer models across multiplelanguages. For Arabic, Bengali, German, Italian, and Spanish, we leveraged both multilingual and monolingual pre-trained transformers such as mBERT, XLM-R, AraBERTv2,BanglaBERT, and BETO. We compare individual classical and transformer-based modelsto identify which modeling choices work bestfor each language. Our results varied substantially across languages. We achieved our bestleaderboard rankings in Bengali (6th out of 48teams) and Italian (6th out of 43 teams), whileperformance was lower in Arabic (33rd out of44), German (41st out of 44), and Spanish (46thout of 48). The study highlights the value ofcomparing classical and transformer-based approaches for multilingual polarization detectionand identifies language-specific challenges forfuture improvement.
%U https://aclanthology.org/2026.semeval-1.268/
%P 2120-2125
Markdown (Informal)
[DataBees at SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization](https://aclanthology.org/2026.semeval-1.268/) (Sriram et al., SemEval 2026)
ACL
- Tanisha Sriram, Sathvika Shankar, Sowmya Anand, Rajalakshmi Sivanaiah, Angel Deborah S, and Mirnalinee Thankanadar. 2026. DataBees at SemEval-2026 Task 9: Detecting Multilingual, Multicultural and Multievent Online Polarization. In Proceedings of the 20th International Workshop on Semantic Evaluation (2026), pages 2120–2125, San Diego, California, USA. Association for Computational Linguistics.