@inproceedings{beniwal-etal-2025-unityai,
title = "{U}nity{AI} Guard: Pioneering Toxicity Detection Across Low-Resource {I}ndian Languages",
author = "Beniwal, Himanshu and
Venkat, Reddybathuni and
Kumar, Rohit and
Srivibhav, Birudugadda and
Jain, Daksh and
Doddi, Pavan Deekshith and
Dhande, Eshwar and
Ananth, Adithya and
Kuldeep and
Singh, Mayank",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.33/",
pages = "471--479",
ISBN = "979-8-89176-334-0",
abstract = "This work introduces UnityAI-Guard, a framework for binary toxicity classification targeting low-resource Indian languages. While existing systems predominantly cater to high-resource languages, UnityAI-Guard addresses this critical gap by developing state-of-the-art models for identifying toxic content across diverse Brahmic/Indic scripts. Our approach achieves an impressive average F1-score of 84.23{\%} across seven languages, leveraging a dataset of 567k training instances and 30k manually verified test instances. By advancing multilingual content moderation for linguistically diverse regions, UnityAI-Guard also provides public API access to foster broader adoption and application."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="beniwal-etal-2025-unityai">
<titleInfo>
<title>UnityAI Guard: Pioneering Toxicity Detection Across Low-Resource Indian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Himanshu</namePart>
<namePart type="family">Beniwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reddybathuni</namePart>
<namePart type="family">Venkat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rohit</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Birudugadda</namePart>
<namePart type="family">Srivibhav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daksh</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavan</namePart>
<namePart type="given">Deekshith</namePart>
<namePart type="family">Doddi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eshwar</namePart>
<namePart type="family">Dhande</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adithya</namePart>
<namePart type="family">Ananth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name>
<namePart>Kuldeep</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mayank</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Schulam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-334-0</identifier>
</relatedItem>
<abstract>This work introduces UnityAI-Guard, a framework for binary toxicity classification targeting low-resource Indian languages. While existing systems predominantly cater to high-resource languages, UnityAI-Guard addresses this critical gap by developing state-of-the-art models for identifying toxic content across diverse Brahmic/Indic scripts. Our approach achieves an impressive average F1-score of 84.23% across seven languages, leveraging a dataset of 567k training instances and 30k manually verified test instances. By advancing multilingual content moderation for linguistically diverse regions, UnityAI-Guard also provides public API access to foster broader adoption and application.</abstract>
<identifier type="citekey">beniwal-etal-2025-unityai</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-demos.33/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>471</start>
<end>479</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UnityAI Guard: Pioneering Toxicity Detection Across Low-Resource Indian Languages
%A Beniwal, Himanshu
%A Venkat, Reddybathuni
%A Kumar, Rohit
%A Srivibhav, Birudugadda
%A Jain, Daksh
%A Doddi, Pavan Deekshith
%A Dhande, Eshwar
%A Ananth, Adithya
%A Singh, Mayank
%Y Habernal, Ivan
%Y Schulam, Peter
%Y Tiedemann, Jörg
%A Kuldeep
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-334-0
%F beniwal-etal-2025-unityai
%X This work introduces UnityAI-Guard, a framework for binary toxicity classification targeting low-resource Indian languages. While existing systems predominantly cater to high-resource languages, UnityAI-Guard addresses this critical gap by developing state-of-the-art models for identifying toxic content across diverse Brahmic/Indic scripts. Our approach achieves an impressive average F1-score of 84.23% across seven languages, leveraging a dataset of 567k training instances and 30k manually verified test instances. By advancing multilingual content moderation for linguistically diverse regions, UnityAI-Guard also provides public API access to foster broader adoption and application.
%U https://aclanthology.org/2025.emnlp-demos.33/
%P 471-479
Markdown (Informal)
[UnityAI Guard: Pioneering Toxicity Detection Across Low-Resource Indian Languages](https://aclanthology.org/2025.emnlp-demos.33/) (Beniwal et al., EMNLP 2025)
ACL
- Himanshu Beniwal, Reddybathuni Venkat, Rohit Kumar, Birudugadda Srivibhav, Daksh Jain, Pavan Deekshith Doddi, Eshwar Dhande, Adithya Ananth, Kuldeep, and Mayank Singh. 2025. UnityAI Guard: Pioneering Toxicity Detection Across Low-Resource Indian Languages. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 471–479, Suzhou, China. Association for Computational Linguistics.