@inproceedings{bui-etal-2026-statements,
title = "From If-Statements to {ML} Pipelines: Revisiting Bias in Code-Generation",
author = "Bui, Minh Duc and
Heilmann, Xenia and
Cerrato, Mattia and
Mager, Manuel and
Von Der Wense, Katharina",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.193/",
pages = "3958--3972",
ISBN = "979-8-89176-395-1",
abstract = "Prior work evaluates code generation bias primarily through simple conditional statements, which represent only a narrow slice of real-world programming and reveal solely overt, explicitly encoded bias. We demonstrate that this approach dramatically underestimates real-world bias by examining a more realistic task: generating machine learning (ML) pipelines. Testing both code-specialized and general-instruction large language models, we find that ML pipelines exhibit substantially greater bias than simple conditionals across all conditions: standard generation, with varying prompt-based mitigation strategies, varying numbers of attributes, and different ML pipeline difficulty levels. Even attribute selection alone, the simplest pipeline difficulty, shows higher bias compared to conditionals, demonstrating that ML pipelines inherently amplify bias beyond what isolated conditionals reveal. Critically, we uncover a stark asymmetry: models maintain equivalent bias detection performance on both simple conditionals and ML pipelines, revealing that models recognize bias equally well in both contexts yet generate significantly more biased code in ML pipelines. These findings challenge simple conditionals as valid proxies for bias evaluation and suggest current benchmarks mischaracterize model safety in practical deployment contexts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bui-etal-2026-statements">
<titleInfo>
<title>From If-Statements to ML Pipelines: Revisiting Bias in Code-Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Minh</namePart>
<namePart type="given">Duc</namePart>
<namePart type="family">Bui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xenia</namePart>
<namePart type="family">Heilmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mattia</namePart>
<namePart type="family">Cerrato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Von Der Wense</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Prior work evaluates code generation bias primarily through simple conditional statements, which represent only a narrow slice of real-world programming and reveal solely overt, explicitly encoded bias. We demonstrate that this approach dramatically underestimates real-world bias by examining a more realistic task: generating machine learning (ML) pipelines. Testing both code-specialized and general-instruction large language models, we find that ML pipelines exhibit substantially greater bias than simple conditionals across all conditions: standard generation, with varying prompt-based mitigation strategies, varying numbers of attributes, and different ML pipeline difficulty levels. Even attribute selection alone, the simplest pipeline difficulty, shows higher bias compared to conditionals, demonstrating that ML pipelines inherently amplify bias beyond what isolated conditionals reveal. Critically, we uncover a stark asymmetry: models maintain equivalent bias detection performance on both simple conditionals and ML pipelines, revealing that models recognize bias equally well in both contexts yet generate significantly more biased code in ML pipelines. These findings challenge simple conditionals as valid proxies for bias evaluation and suggest current benchmarks mischaracterize model safety in practical deployment contexts.</abstract>
<identifier type="citekey">bui-etal-2026-statements</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.193/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>3958</start>
<end>3972</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From If-Statements to ML Pipelines: Revisiting Bias in Code-Generation
%A Bui, Minh Duc
%A Heilmann, Xenia
%A Cerrato, Mattia
%A Mager, Manuel
%A Von Der Wense, Katharina
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F bui-etal-2026-statements
%X Prior work evaluates code generation bias primarily through simple conditional statements, which represent only a narrow slice of real-world programming and reveal solely overt, explicitly encoded bias. We demonstrate that this approach dramatically underestimates real-world bias by examining a more realistic task: generating machine learning (ML) pipelines. Testing both code-specialized and general-instruction large language models, we find that ML pipelines exhibit substantially greater bias than simple conditionals across all conditions: standard generation, with varying prompt-based mitigation strategies, varying numbers of attributes, and different ML pipeline difficulty levels. Even attribute selection alone, the simplest pipeline difficulty, shows higher bias compared to conditionals, demonstrating that ML pipelines inherently amplify bias beyond what isolated conditionals reveal. Critically, we uncover a stark asymmetry: models maintain equivalent bias detection performance on both simple conditionals and ML pipelines, revealing that models recognize bias equally well in both contexts yet generate significantly more biased code in ML pipelines. These findings challenge simple conditionals as valid proxies for bias evaluation and suggest current benchmarks mischaracterize model safety in practical deployment contexts.
%U https://aclanthology.org/2026.findings-acl.193/
%P 3958-3972
Markdown (Informal)
[From If-Statements to ML Pipelines: Revisiting Bias in Code-Generation](https://aclanthology.org/2026.findings-acl.193/) (Bui et al., Findings 2026)
ACL