@inproceedings{pokrywka-jassem-2024-kubapok,
title = "kubapok@{LT}-{EDI} 2024: Evaluating Transformer Models for Hate Speech Detection in {T}amil",
author = "Pokrywka, Jakub and
Jassem, Krzysztof",
editor = {Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Durairaj, Thenmozhi and
Kov{\'a}cs, Gy{\"o}rgy and
Garc{\'\i}a Cumbreras, Miguel {\'A}ngel},
booktitle = "Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.ltedi-1.22",
pages = "196--199",
abstract = "We describe the second-place submission for the shared task organized at the Fourth Workshop on Language Technology for Equality, Diversity, and Inclusion (LT-EDI-2024). The task focuses on detecting caste/migration hate speech in Tamil. The included texts involve the Tamil language in both Tamil script and transliterated into Latin script, with some texts also in English. Considering different scripts, we examined the performance of 12 transformer language models on the dev set. Our analysis revealed that for the whole dataset, the model google/muril-large-cased performs the best. We used an ensemble of several models for the final challenge submission, achieving 0.81 for the test dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pokrywka-jassem-2024-kubapok">
<titleInfo>
<title>kubapok@LT-EDI 2024: Evaluating Transformer Models for Hate Speech Detection in Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Pokrywka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Jassem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thenmozhi</namePart>
<namePart type="family">Durairaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">György</namePart>
<namePart type="family">Kovács</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">García Cumbreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the second-place submission for the shared task organized at the Fourth Workshop on Language Technology for Equality, Diversity, and Inclusion (LT-EDI-2024). The task focuses on detecting caste/migration hate speech in Tamil. The included texts involve the Tamil language in both Tamil script and transliterated into Latin script, with some texts also in English. Considering different scripts, we examined the performance of 12 transformer language models on the dev set. Our analysis revealed that for the whole dataset, the model google/muril-large-cased performs the best. We used an ensemble of several models for the final challenge submission, achieving 0.81 for the test dataset.</abstract>
<identifier type="citekey">pokrywka-jassem-2024-kubapok</identifier>
<location>
<url>https://aclanthology.org/2024.ltedi-1.22</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>196</start>
<end>199</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T kubapok@LT-EDI 2024: Evaluating Transformer Models for Hate Speech Detection in Tamil
%A Pokrywka, Jakub
%A Jassem, Krzysztof
%Y Chakravarthi, Bharathi Raja
%Y B, Bharathi
%Y Buitelaar, Paul
%Y Durairaj, Thenmozhi
%Y Kovács, György
%Y García Cumbreras, Miguel Ángel
%S Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F pokrywka-jassem-2024-kubapok
%X We describe the second-place submission for the shared task organized at the Fourth Workshop on Language Technology for Equality, Diversity, and Inclusion (LT-EDI-2024). The task focuses on detecting caste/migration hate speech in Tamil. The included texts involve the Tamil language in both Tamil script and transliterated into Latin script, with some texts also in English. Considering different scripts, we examined the performance of 12 transformer language models on the dev set. Our analysis revealed that for the whole dataset, the model google/muril-large-cased performs the best. We used an ensemble of several models for the final challenge submission, achieving 0.81 for the test dataset.
%U https://aclanthology.org/2024.ltedi-1.22
%P 196-199
Markdown (Informal)
[kubapok@LT-EDI 2024: Evaluating Transformer Models for Hate Speech Detection in Tamil](https://aclanthology.org/2024.ltedi-1.22) (Pokrywka & Jassem, LTEDI-WS 2024)
ACL