@inproceedings{kattamuri-etal-2026-equilibrium,
title = "Equilibrium Dynamics and Mitigation of Gender Bias in Synthetically Generated Data",
author = "Kattamuri, Ashish and
Vats, Arpita and
Fartale, Harshwardhan and
Raja, Rahul and
Moharir, Akshata Kishore and
Prasad, Ishita",
editor = "Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Thenmozhi, Durairaj and
Garc{\'i}a Cumbreras, Miguel {\'A}ngel and
Jim{\'e}nez Zafra, Salud Mar{\'i}a",
booktitle = "Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = jul,
year = "2026",
address = "Virtual (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.ltedi-1.4/",
pages = "37--42",
ISBN = "979-8-89176-424-8",
abstract = "Recursive prompting with large language models enables scalable synthetic dataset generation but introduces the risk of bias amplification. We investigate gender bias dynamics across three generations of recursive text generation using three complementary evaluation frameworks: rule-based pattern matching, embedding based semantic similarity, and downstream task performance. Experiments with three initial bias levels (0.1, 0.3, 0.6) and four mitigation strategies reveal equilibrium dynamics rather than monotonic amplification. The low initial bias amplifies toward the model{'}s inherent bias level (+ 36{\%}), whereas the high initial bias decays toward it (-26{\%}). Among mitigation methods, contrastive augmentation, which introduces gender-swapped variants, achieves significant downstream bias reduction (98.8{\%} for low initial bias and 91{\%} on average) despite producing higher embedding-based bias scores. This paradox demonstrates that semantic similarity metrics may diverge from behavioral fairness outcomes, highlighting the need for multidimensional evaluation in responsible synthetic data generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kattamuri-etal-2026-equilibrium">
<titleInfo>
<title>Equilibrium Dynamics and Mitigation of Gender Bias in Synthetically Generated Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Kattamuri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpita</namePart>
<namePart type="family">Vats</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harshwardhan</namePart>
<namePart type="family">Fartale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Raja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshata</namePart>
<namePart type="given">Kishore</namePart>
<namePart type="family">Moharir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ishita</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Durairaj</namePart>
<namePart type="family">Thenmozhi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">García Cumbreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salud</namePart>
<namePart type="given">María</namePart>
<namePart type="family">Jiménez Zafra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Virtual (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-424-8</identifier>
</relatedItem>
<abstract>Recursive prompting with large language models enables scalable synthetic dataset generation but introduces the risk of bias amplification. We investigate gender bias dynamics across three generations of recursive text generation using three complementary evaluation frameworks: rule-based pattern matching, embedding based semantic similarity, and downstream task performance. Experiments with three initial bias levels (0.1, 0.3, 0.6) and four mitigation strategies reveal equilibrium dynamics rather than monotonic amplification. The low initial bias amplifies toward the model’s inherent bias level (+ 36%), whereas the high initial bias decays toward it (-26%). Among mitigation methods, contrastive augmentation, which introduces gender-swapped variants, achieves significant downstream bias reduction (98.8% for low initial bias and 91% on average) despite producing higher embedding-based bias scores. This paradox demonstrates that semantic similarity metrics may diverge from behavioral fairness outcomes, highlighting the need for multidimensional evaluation in responsible synthetic data generation.</abstract>
<identifier type="citekey">kattamuri-etal-2026-equilibrium</identifier>
<location>
<url>https://aclanthology.org/2026.ltedi-1.4/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>37</start>
<end>42</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Equilibrium Dynamics and Mitigation of Gender Bias in Synthetically Generated Data
%A Kattamuri, Ashish
%A Vats, Arpita
%A Fartale, Harshwardhan
%A Raja, Rahul
%A Moharir, Akshata Kishore
%A Prasad, Ishita
%Y Chakravarthi, Bharathi Raja
%Y B, Bharathi
%Y Buitelaar, Paul
%Y Thenmozhi, Durairaj
%Y García Cumbreras, Miguel Ángel
%Y Jiménez Zafra, Salud María
%S Proceedings of the Sixth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2026
%8 July
%I Association for Computational Linguistics
%C Virtual (Online)
%@ 979-8-89176-424-8
%F kattamuri-etal-2026-equilibrium
%X Recursive prompting with large language models enables scalable synthetic dataset generation but introduces the risk of bias amplification. We investigate gender bias dynamics across three generations of recursive text generation using three complementary evaluation frameworks: rule-based pattern matching, embedding based semantic similarity, and downstream task performance. Experiments with three initial bias levels (0.1, 0.3, 0.6) and four mitigation strategies reveal equilibrium dynamics rather than monotonic amplification. The low initial bias amplifies toward the model’s inherent bias level (+ 36%), whereas the high initial bias decays toward it (-26%). Among mitigation methods, contrastive augmentation, which introduces gender-swapped variants, achieves significant downstream bias reduction (98.8% for low initial bias and 91% on average) despite producing higher embedding-based bias scores. This paradox demonstrates that semantic similarity metrics may diverge from behavioral fairness outcomes, highlighting the need for multidimensional evaluation in responsible synthetic data generation.
%U https://aclanthology.org/2026.ltedi-1.4/
%P 37-42
Markdown (Informal)
[Equilibrium Dynamics and Mitigation of Gender Bias in Synthetically Generated Data](https://aclanthology.org/2026.ltedi-1.4/) (Kattamuri et al., LTEDI 2026)
ACL