@inproceedings{susanto-etal-2025-multi,
title = "A Multi-Labeled Dataset for {I}ndonesian Discourse: Examining Toxicity, Polarization, and Demographics Information",
author = "Susanto, Lucky and
Wijanarko, Musa Izzanardi and
Pratama, Prasetia Anugrah and
Tang, Zilu and
Akyas, Fariz and
Hong, Traci and
Idris, Ika Karlina and
Aji, Alham Fikri and
Wijaya, Derry Tanti",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.966/",
doi = "10.18653/v1/2025.findings-acl.966",
pages = "18863--18890",
ISBN = "979-8-89176-256-5",
abstract = "Online discourse is increasingly trapped in a vicious cycle where polarizing language fuelstoxicity and vice versa. Identity, one of the most divisive issues in modern politics, oftenincreases polarization. Yet, prior NLP research has mostly treated toxicity and polarization asseparate problems. In Indonesia, the world{'}s third-largest democracy, this dynamic threatens democratic discourse, particularly in online spaces. We argue that polarization and toxicity must be studied in relation to each other. To this end, we present a novel multi-label Indonesian dataset annotated for toxicity, polarization, and annotator demographic information. Benchmarking with BERT-base models and large language models (LLMs) reveals that polarization cues improve toxicity classification and vice versa. Including demographic context further enhances polarization classification performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="susanto-etal-2025-multi">
<titleInfo>
<title>A Multi-Labeled Dataset for Indonesian Discourse: Examining Toxicity, Polarization, and Demographics Information</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucky</namePart>
<namePart type="family">Susanto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Musa</namePart>
<namePart type="given">Izzanardi</namePart>
<namePart type="family">Wijanarko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prasetia</namePart>
<namePart type="given">Anugrah</namePart>
<namePart type="family">Pratama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zilu</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fariz</namePart>
<namePart type="family">Akyas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Traci</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ika</namePart>
<namePart type="given">Karlina</namePart>
<namePart type="family">Idris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alham</namePart>
<namePart type="given">Fikri</namePart>
<namePart type="family">Aji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derry</namePart>
<namePart type="given">Tanti</namePart>
<namePart type="family">Wijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Online discourse is increasingly trapped in a vicious cycle where polarizing language fuelstoxicity and vice versa. Identity, one of the most divisive issues in modern politics, oftenincreases polarization. Yet, prior NLP research has mostly treated toxicity and polarization asseparate problems. In Indonesia, the world’s third-largest democracy, this dynamic threatens democratic discourse, particularly in online spaces. We argue that polarization and toxicity must be studied in relation to each other. To this end, we present a novel multi-label Indonesian dataset annotated for toxicity, polarization, and annotator demographic information. Benchmarking with BERT-base models and large language models (LLMs) reveals that polarization cues improve toxicity classification and vice versa. Including demographic context further enhances polarization classification performance.</abstract>
<identifier type="citekey">susanto-etal-2025-multi</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.966</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.966/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>18863</start>
<end>18890</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multi-Labeled Dataset for Indonesian Discourse: Examining Toxicity, Polarization, and Demographics Information
%A Susanto, Lucky
%A Wijanarko, Musa Izzanardi
%A Pratama, Prasetia Anugrah
%A Tang, Zilu
%A Akyas, Fariz
%A Hong, Traci
%A Idris, Ika Karlina
%A Aji, Alham Fikri
%A Wijaya, Derry Tanti
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F susanto-etal-2025-multi
%X Online discourse is increasingly trapped in a vicious cycle where polarizing language fuelstoxicity and vice versa. Identity, one of the most divisive issues in modern politics, oftenincreases polarization. Yet, prior NLP research has mostly treated toxicity and polarization asseparate problems. In Indonesia, the world’s third-largest democracy, this dynamic threatens democratic discourse, particularly in online spaces. We argue that polarization and toxicity must be studied in relation to each other. To this end, we present a novel multi-label Indonesian dataset annotated for toxicity, polarization, and annotator demographic information. Benchmarking with BERT-base models and large language models (LLMs) reveals that polarization cues improve toxicity classification and vice versa. Including demographic context further enhances polarization classification performance.
%R 10.18653/v1/2025.findings-acl.966
%U https://aclanthology.org/2025.findings-acl.966/
%U https://doi.org/10.18653/v1/2025.findings-acl.966
%P 18863-18890
Markdown (Informal)
[A Multi-Labeled Dataset for Indonesian Discourse: Examining Toxicity, Polarization, and Demographics Information](https://aclanthology.org/2025.findings-acl.966/) (Susanto et al., Findings 2025)
ACL
- Lucky Susanto, Musa Izzanardi Wijanarko, Prasetia Anugrah Pratama, Zilu Tang, Fariz Akyas, Traci Hong, Ika Karlina Idris, Alham Fikri Aji, and Derry Tanti Wijaya. 2025. A Multi-Labeled Dataset for Indonesian Discourse: Examining Toxicity, Polarization, and Demographics Information. In Findings of the Association for Computational Linguistics: ACL 2025, pages 18863–18890, Vienna, Austria. Association for Computational Linguistics.