@inproceedings{nahurna-romanyshyn-2025-gender,
title = "Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for {U}krainian Language Processing",
author = "Nahurna, Olha and
Romanyshyn, Mariana",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fourth Ukrainian Natural Language Processing Workshop (UNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria (online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.unlp-1.16/",
doi = "10.18653/v1/2025.unlp-1.16",
pages = "147--161",
ISBN = "979-8-89176-269-5",
abstract = "This paper presents a pipeline for generating gender-balanced datasets through sentence-level gender swapping, addressing the gender-imbalance issue in Ukrainian texts. We select sentences with gender-marked entities, focusing on job titles, generate their inverted alternatives using LLMs and human-in-the-loop, and fine-tune Aya-101 on the resulting dataset for the task of gender swapping. Additionally, we train a Named Entity Recognition (NER) model on gender-balanced data, demonstrating its ability to better recognize gendered entities. The findings unveil the potential of gender-balanced datasets to enhance model robustness and support more fair language processing. Finally, we make a gender-swapped version of NER-UK{\textasciitilde}2.0 and the fine-tuned Aya-101 model available for download and further research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nahurna-romanyshyn-2025-gender">
<titleInfo>
<title>Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for Ukrainian Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olha</namePart>
<namePart type="family">Nahurna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Ukrainian Natural Language Processing Workshop (UNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria (online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-269-5</identifier>
</relatedItem>
<abstract>This paper presents a pipeline for generating gender-balanced datasets through sentence-level gender swapping, addressing the gender-imbalance issue in Ukrainian texts. We select sentences with gender-marked entities, focusing on job titles, generate their inverted alternatives using LLMs and human-in-the-loop, and fine-tune Aya-101 on the resulting dataset for the task of gender swapping. Additionally, we train a Named Entity Recognition (NER) model on gender-balanced data, demonstrating its ability to better recognize gendered entities. The findings unveil the potential of gender-balanced datasets to enhance model robustness and support more fair language processing. Finally, we make a gender-swapped version of NER-UK~2.0 and the fine-tuned Aya-101 model available for download and further research.</abstract>
<identifier type="citekey">nahurna-romanyshyn-2025-gender</identifier>
<identifier type="doi">10.18653/v1/2025.unlp-1.16</identifier>
<location>
<url>https://aclanthology.org/2025.unlp-1.16/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>147</start>
<end>161</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for Ukrainian Language Processing
%A Nahurna, Olha
%A Romanyshyn, Mariana
%Y Romanyshyn, Mariana
%S Proceedings of the Fourth Ukrainian Natural Language Processing Workshop (UNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria (online)
%@ 979-8-89176-269-5
%F nahurna-romanyshyn-2025-gender
%X This paper presents a pipeline for generating gender-balanced datasets through sentence-level gender swapping, addressing the gender-imbalance issue in Ukrainian texts. We select sentences with gender-marked entities, focusing on job titles, generate their inverted alternatives using LLMs and human-in-the-loop, and fine-tune Aya-101 on the resulting dataset for the task of gender swapping. Additionally, we train a Named Entity Recognition (NER) model on gender-balanced data, demonstrating its ability to better recognize gendered entities. The findings unveil the potential of gender-balanced datasets to enhance model robustness and support more fair language processing. Finally, we make a gender-swapped version of NER-UK~2.0 and the fine-tuned Aya-101 model available for download and further research.
%R 10.18653/v1/2025.unlp-1.16
%U https://aclanthology.org/2025.unlp-1.16/
%U https://doi.org/10.18653/v1/2025.unlp-1.16
%P 147-161
Markdown (Informal)
[Gender Swapping as a Data Augmentation Technique: Developing Gender-Balanced Datasets for Ukrainian Language Processing](https://aclanthology.org/2025.unlp-1.16/) (Nahurna & Romanyshyn, UNLP 2025)
ACL