@inproceedings{mut-altin-saggion-2025-investigating,
title = "Investigating Large Language Models' ({LLM}s) Capabilities for Sexism Detection on a Low-Resource Language",
author = "Mut Altin, Lutfiye Seda and
Saggion, Horacio",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.89/",
pages = "771--779",
abstract = "Automatic detection of sexist language on social media is gaining attention due to its harmful societal impact and technical challenges it presents. The limited availability of data resources in some languages restricts the development of effective tools to fight the spread of such content. In this work, we investigated various methods to improve the efficiency of automatic detection of sexism and its subtypes in a low-resource language, Turkish. We first experimented with various LLM prompting strategies for classification and then investigated the impact of different data augmentation strategies, including both synthetic data generation with LLMs (GPT, DeepSeek) and translationbased augmentation using English and Spanish data. Finally, we examined whether these augmentation methods would improve model performance of a trained neural network (BERT). Our benchmarking results show that fine-tuned LLM (GPT-4o-mini) achieved the best performance compared to zero-shot, few-shot, Chain-of-Thought prompt classification and training a neural network (BERT) including the data augmented in different ways (synthetic generation, translation). Our results also indicated that, for the classification of more granular classes, in other words, more specific tasks, training a neural network generally performed better than prompt-based classification using an LLM."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mut-altin-saggion-2025-investigating">
<titleInfo>
<title>Investigating Large Language Models’ (LLMs) Capabilities for Sexism Detection on a Low-Resource Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lutfiye</namePart>
<namePart type="given">Seda</namePart>
<namePart type="family">Mut Altin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic detection of sexist language on social media is gaining attention due to its harmful societal impact and technical challenges it presents. The limited availability of data resources in some languages restricts the development of effective tools to fight the spread of such content. In this work, we investigated various methods to improve the efficiency of automatic detection of sexism and its subtypes in a low-resource language, Turkish. We first experimented with various LLM prompting strategies for classification and then investigated the impact of different data augmentation strategies, including both synthetic data generation with LLMs (GPT, DeepSeek) and translationbased augmentation using English and Spanish data. Finally, we examined whether these augmentation methods would improve model performance of a trained neural network (BERT). Our benchmarking results show that fine-tuned LLM (GPT-4o-mini) achieved the best performance compared to zero-shot, few-shot, Chain-of-Thought prompt classification and training a neural network (BERT) including the data augmented in different ways (synthetic generation, translation). Our results also indicated that, for the classification of more granular classes, in other words, more specific tasks, training a neural network generally performed better than prompt-based classification using an LLM.</abstract>
<identifier type="citekey">mut-altin-saggion-2025-investigating</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.89/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>771</start>
<end>779</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating Large Language Models’ (LLMs) Capabilities for Sexism Detection on a Low-Resource Language
%A Mut Altin, Lutfiye Seda
%A Saggion, Horacio
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F mut-altin-saggion-2025-investigating
%X Automatic detection of sexist language on social media is gaining attention due to its harmful societal impact and technical challenges it presents. The limited availability of data resources in some languages restricts the development of effective tools to fight the spread of such content. In this work, we investigated various methods to improve the efficiency of automatic detection of sexism and its subtypes in a low-resource language, Turkish. We first experimented with various LLM prompting strategies for classification and then investigated the impact of different data augmentation strategies, including both synthetic data generation with LLMs (GPT, DeepSeek) and translationbased augmentation using English and Spanish data. Finally, we examined whether these augmentation methods would improve model performance of a trained neural network (BERT). Our benchmarking results show that fine-tuned LLM (GPT-4o-mini) achieved the best performance compared to zero-shot, few-shot, Chain-of-Thought prompt classification and training a neural network (BERT) including the data augmented in different ways (synthetic generation, translation). Our results also indicated that, for the classification of more granular classes, in other words, more specific tasks, training a neural network generally performed better than prompt-based classification using an LLM.
%U https://aclanthology.org/2025.ranlp-1.89/
%P 771-779
Markdown (Informal)
[Investigating Large Language Models’ (LLMs) Capabilities for Sexism Detection on a Low-Resource Language](https://aclanthology.org/2025.ranlp-1.89/) (Mut Altin & Saggion, RANLP 2025)
ACL