@inproceedings{balouchzahi-etal-2021-mucic,
title = "{MUCIC} at {C}om{MA}@{ICON}: Multilingual Gender Biased and Communal Language Identification Using N-grams and Multilingual Sentence Encoders",
author = "Balouchzahi, Fazlourrahman and
Vitman, Oxana and
Shashirekha, Hosahalli Lakshmaiah and
Sidorov, Grigori and
Gelbukh, Alexander",
editor = "Kumar, Ritesh and
Singh, Siddharth and
Nandi, Enakshi and
Ratan, Shyam and
Devi, Laishram Niranjana and
Lahiri, Bornini and
Bansal, Akanksha and
Bhagat, Akash and
Dawer, Yogesh",
booktitle = "Proceedings of the 18th International Conference on Natural Language Processing: Shared Task on Multilingual Gender Biased and Communal Language Identification",
month = dec,
year = "2021",
address = "NIT Silchar",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2021.icon-multigen.9",
pages = "58--63",
abstract = "Social media analytics are widely being explored by researchers for various applications. Prominent among them are identifying and blocking abusive contents especially targeting individuals and communities, for various reasons. The increasing abusive contents and the increasing number of users on social media demands automated tools to detect and filter the abusive contents as it is highly impossible to handle this manually. To address the challenges of detecting abusive contents, this paper describes the approaches proposed by our team MUCIC for Multilingual Gender Biased and Communal Language Identification shared task (ComMA@ICON) at International Conference on Natural Language Processing (ICON) 2021. This shared task dataset consists of code-mixed multi-script texts in Meitei, Bangla, Hindi as well as in Multilingual (a combination of Meitei, Bangla, Hindi, and English). The shared task is modeled as a multi-label Text Classification (TC) task combining word and char n-grams with vectors obtained from Multilingual Sentence Encoder (MSE) to train the Machine Learning (ML) classifiers using Pre-aggregation and Post-aggregation of labels. These approaches obtained the highest performance in the shared task for Meitei, Bangla, and Multilingual texts with instance-F1 scores of 0.350, 0.412, and 0.380 respectively using Pre-aggregation of labels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="balouchzahi-etal-2021-mucic">
<titleInfo>
<title>MUCIC at ComMA@ICON: Multilingual Gender Biased and Communal Language Identification Using N-grams and Multilingual Sentence Encoders</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fazlourrahman</namePart>
<namePart type="family">Balouchzahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oxana</namePart>
<namePart type="family">Vitman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosahalli</namePart>
<namePart type="given">Lakshmaiah</namePart>
<namePart type="family">Shashirekha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Grigori</namePart>
<namePart type="family">Sidorov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Gelbukh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th International Conference on Natural Language Processing: Shared Task on Multilingual Gender Biased and Communal Language Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enakshi</namePart>
<namePart type="family">Nandi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shyam</namePart>
<namePart type="family">Ratan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laishram</namePart>
<namePart type="given">Niranjana</namePart>
<namePart type="family">Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bornini</namePart>
<namePart type="family">Lahiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akanksha</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akash</namePart>
<namePart type="family">Bhagat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yogesh</namePart>
<namePart type="family">Dawer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">NIT Silchar</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media analytics are widely being explored by researchers for various applications. Prominent among them are identifying and blocking abusive contents especially targeting individuals and communities, for various reasons. The increasing abusive contents and the increasing number of users on social media demands automated tools to detect and filter the abusive contents as it is highly impossible to handle this manually. To address the challenges of detecting abusive contents, this paper describes the approaches proposed by our team MUCIC for Multilingual Gender Biased and Communal Language Identification shared task (ComMA@ICON) at International Conference on Natural Language Processing (ICON) 2021. This shared task dataset consists of code-mixed multi-script texts in Meitei, Bangla, Hindi as well as in Multilingual (a combination of Meitei, Bangla, Hindi, and English). The shared task is modeled as a multi-label Text Classification (TC) task combining word and char n-grams with vectors obtained from Multilingual Sentence Encoder (MSE) to train the Machine Learning (ML) classifiers using Pre-aggregation and Post-aggregation of labels. These approaches obtained the highest performance in the shared task for Meitei, Bangla, and Multilingual texts with instance-F1 scores of 0.350, 0.412, and 0.380 respectively using Pre-aggregation of labels.</abstract>
<identifier type="citekey">balouchzahi-etal-2021-mucic</identifier>
<location>
<url>https://aclanthology.org/2021.icon-multigen.9</url>
</location>
<part>
<date>2021-12</date>
<extent unit="page">
<start>58</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MUCIC at ComMA@ICON: Multilingual Gender Biased and Communal Language Identification Using N-grams and Multilingual Sentence Encoders
%A Balouchzahi, Fazlourrahman
%A Vitman, Oxana
%A Shashirekha, Hosahalli Lakshmaiah
%A Sidorov, Grigori
%A Gelbukh, Alexander
%Y Kumar, Ritesh
%Y Singh, Siddharth
%Y Nandi, Enakshi
%Y Ratan, Shyam
%Y Devi, Laishram Niranjana
%Y Lahiri, Bornini
%Y Bansal, Akanksha
%Y Bhagat, Akash
%Y Dawer, Yogesh
%S Proceedings of the 18th International Conference on Natural Language Processing: Shared Task on Multilingual Gender Biased and Communal Language Identification
%D 2021
%8 December
%I NLP Association of India (NLPAI)
%C NIT Silchar
%F balouchzahi-etal-2021-mucic
%X Social media analytics are widely being explored by researchers for various applications. Prominent among them are identifying and blocking abusive contents especially targeting individuals and communities, for various reasons. The increasing abusive contents and the increasing number of users on social media demands automated tools to detect and filter the abusive contents as it is highly impossible to handle this manually. To address the challenges of detecting abusive contents, this paper describes the approaches proposed by our team MUCIC for Multilingual Gender Biased and Communal Language Identification shared task (ComMA@ICON) at International Conference on Natural Language Processing (ICON) 2021. This shared task dataset consists of code-mixed multi-script texts in Meitei, Bangla, Hindi as well as in Multilingual (a combination of Meitei, Bangla, Hindi, and English). The shared task is modeled as a multi-label Text Classification (TC) task combining word and char n-grams with vectors obtained from Multilingual Sentence Encoder (MSE) to train the Machine Learning (ML) classifiers using Pre-aggregation and Post-aggregation of labels. These approaches obtained the highest performance in the shared task for Meitei, Bangla, and Multilingual texts with instance-F1 scores of 0.350, 0.412, and 0.380 respectively using Pre-aggregation of labels.
%U https://aclanthology.org/2021.icon-multigen.9
%P 58-63
Markdown (Informal)
[MUCIC at ComMA@ICON: Multilingual Gender Biased and Communal Language Identification Using N-grams and Multilingual Sentence Encoders](https://aclanthology.org/2021.icon-multigen.9) (Balouchzahi et al., ICON 2021)
ACL