@inproceedings{mouselinos-etal-2023-simple,
title = "A Simple, Yet Effective Approach to Finding Biases in Code Generation",
author = "Mouselinos, Spyridon and
Malinowski, Mateusz and
Michalewski, Henryk",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.718",
doi = "10.18653/v1/2023.findings-acl.718",
pages = "11299--11329",
abstract = "Recently, high-performing code generation systems based on large language models have surfaced. They are trained on massive corpora containing much more natural text than actual executable computer code. This work shows that current code generation systems exhibit undesired biases inherited from their large language model backbones, which can reduce the quality of the generated code under specific circumstances. To investigate the effect, we propose the {``}block of influence{''} concept, which enables a modular decomposition and analysis of the coding challenges. We introduce an automated intervention mechanism reminiscent of adversarial testing that exposes undesired biases through the failure modes of the models under test. Finally, we demonstrate how our framework can be used as a data transformation technique during fine-tuning, acting as a mitigation strategy for these biases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mouselinos-etal-2023-simple">
<titleInfo>
<title>A Simple, Yet Effective Approach to Finding Biases in Code Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Spyridon</namePart>
<namePart type="family">Mouselinos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mateusz</namePart>
<namePart type="family">Malinowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henryk</namePart>
<namePart type="family">Michalewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, high-performing code generation systems based on large language models have surfaced. They are trained on massive corpora containing much more natural text than actual executable computer code. This work shows that current code generation systems exhibit undesired biases inherited from their large language model backbones, which can reduce the quality of the generated code under specific circumstances. To investigate the effect, we propose the “block of influence” concept, which enables a modular decomposition and analysis of the coding challenges. We introduce an automated intervention mechanism reminiscent of adversarial testing that exposes undesired biases through the failure modes of the models under test. Finally, we demonstrate how our framework can be used as a data transformation technique during fine-tuning, acting as a mitigation strategy for these biases.</abstract>
<identifier type="citekey">mouselinos-etal-2023-simple</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.718</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.718</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>11299</start>
<end>11329</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Simple, Yet Effective Approach to Finding Biases in Code Generation
%A Mouselinos, Spyridon
%A Malinowski, Mateusz
%A Michalewski, Henryk
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F mouselinos-etal-2023-simple
%X Recently, high-performing code generation systems based on large language models have surfaced. They are trained on massive corpora containing much more natural text than actual executable computer code. This work shows that current code generation systems exhibit undesired biases inherited from their large language model backbones, which can reduce the quality of the generated code under specific circumstances. To investigate the effect, we propose the “block of influence” concept, which enables a modular decomposition and analysis of the coding challenges. We introduce an automated intervention mechanism reminiscent of adversarial testing that exposes undesired biases through the failure modes of the models under test. Finally, we demonstrate how our framework can be used as a data transformation technique during fine-tuning, acting as a mitigation strategy for these biases.
%R 10.18653/v1/2023.findings-acl.718
%U https://aclanthology.org/2023.findings-acl.718
%U https://doi.org/10.18653/v1/2023.findings-acl.718
%P 11299-11329
Markdown (Informal)
[A Simple, Yet Effective Approach to Finding Biases in Code Generation](https://aclanthology.org/2023.findings-acl.718) (Mouselinos et al., Findings 2023)
ACL