@inproceedings{garcia-diaz-etal-2022-umuteam-tamilnlp,
title = "{UMUT}eam@{T}amil{NLP}-{ACL}2022: Abusive Detection in {T}amil using Linguistic Features and Transformers",
author = "Garc{\'\i}a-D{\'\i}az, Jos{\'e} and
Valencia-Garcia, Manuel and
Valencia-Garc{\'\i}a, Rafael",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Krishnamurthy, Parameswari and
Sherly, Elizabeth and
Mahesan, Sinnathamby",
booktitle = "Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.dravidianlangtech-1.7",
doi = "10.18653/v1/2022.dravidianlangtech-1.7",
pages = "45--50",
abstract = "Social media has become a dangerous place as bullies take advantage of the anonymity the Internet provides to target and intimidate vulnerable individuals and groups. In the past few years, the research community has focused on developing automatic classification tools for detecting hate-speech, its variants, and other types of abusive behaviour. However, these methods are still at an early stage in low-resource languages. With the aim of reducing this barrier, the TamilNLP shared task has proposed a multi-classification challenge for Tamil written in Tamil script and code-mixed to detect abusive comments and hope-speech. Our participation consists of a knowledge integration strategy that combines sentence embeddings from BERT, RoBERTa, FastText and a subset of language-independent linguistic features. We achieved our best result in code-mixed, reaching 3rd position with a macro-average f1-score of 35{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="garcia-diaz-etal-2022-umuteam-tamilnlp">
<titleInfo>
<title>UMUTeam@TamilNLP-ACL2022: Abusive Detection in Tamil using Linguistic Features and Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="family">García-Díaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Valencia-Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Valencia-García</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Parameswari</namePart>
<namePart type="family">Krishnamurthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sinnathamby</namePart>
<namePart type="family">Mahesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media has become a dangerous place as bullies take advantage of the anonymity the Internet provides to target and intimidate vulnerable individuals and groups. In the past few years, the research community has focused on developing automatic classification tools for detecting hate-speech, its variants, and other types of abusive behaviour. However, these methods are still at an early stage in low-resource languages. With the aim of reducing this barrier, the TamilNLP shared task has proposed a multi-classification challenge for Tamil written in Tamil script and code-mixed to detect abusive comments and hope-speech. Our participation consists of a knowledge integration strategy that combines sentence embeddings from BERT, RoBERTa, FastText and a subset of language-independent linguistic features. We achieved our best result in code-mixed, reaching 3rd position with a macro-average f1-score of 35%.</abstract>
<identifier type="citekey">garcia-diaz-etal-2022-umuteam-tamilnlp</identifier>
<identifier type="doi">10.18653/v1/2022.dravidianlangtech-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.dravidianlangtech-1.7</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>45</start>
<end>50</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UMUTeam@TamilNLP-ACL2022: Abusive Detection in Tamil using Linguistic Features and Transformers
%A García-Díaz, José
%A Valencia-Garcia, Manuel
%A Valencia-García, Rafael
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Krishnamurthy, Parameswari
%Y Sherly, Elizabeth
%Y Mahesan, Sinnathamby
%S Proceedings of the Second Workshop on Speech and Language Technologies for Dravidian Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F garcia-diaz-etal-2022-umuteam-tamilnlp
%X Social media has become a dangerous place as bullies take advantage of the anonymity the Internet provides to target and intimidate vulnerable individuals and groups. In the past few years, the research community has focused on developing automatic classification tools for detecting hate-speech, its variants, and other types of abusive behaviour. However, these methods are still at an early stage in low-resource languages. With the aim of reducing this barrier, the TamilNLP shared task has proposed a multi-classification challenge for Tamil written in Tamil script and code-mixed to detect abusive comments and hope-speech. Our participation consists of a knowledge integration strategy that combines sentence embeddings from BERT, RoBERTa, FastText and a subset of language-independent linguistic features. We achieved our best result in code-mixed, reaching 3rd position with a macro-average f1-score of 35%.
%R 10.18653/v1/2022.dravidianlangtech-1.7
%U https://aclanthology.org/2022.dravidianlangtech-1.7
%U https://doi.org/10.18653/v1/2022.dravidianlangtech-1.7
%P 45-50
Markdown (Informal)
[UMUTeam@TamilNLP-ACL2022: Abusive Detection in Tamil using Linguistic Features and Transformers](https://aclanthology.org/2022.dravidianlangtech-1.7) (García-Díaz et al., DravidianLangTech 2022)
ACL