@inproceedings{nozza-etal-2022-pipelines,
title = "Pipelines for Social Bias Testing of Large Language Models",
author = "Nozza, Debora and
Bianchi, Federico and
Hovy, Dirk",
editor = "Fan, Angela and
Ilic, Suzana and
Wolf, Thomas and
Gall{\'e}, Matthias",
booktitle = "Proceedings of BigScience Episode {\#}5 -- Workshop on Challenges {\&} Perspectives in Creating Large Language Models",
month = may,
year = "2022",
address = "virtual+Dublin",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bigscience-1.6",
doi = "10.18653/v1/2022.bigscience-1.6",
pages = "68--74",
abstract = "The maturity level of language models is now at a stage in which many companies rely on them to solve various tasks. However, while research has shown how biased and harmful these models are, systematic ways of integrating social bias tests into development pipelines are still lacking. This short paper suggests how to use these verification techniques in development pipelines. We take inspiration from software testing and suggest addressing social bias evaluation as software testing. We hope to open a discussion on the best methodologies to handle social bias testing in language models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nozza-etal-2022-pipelines">
<titleInfo>
<title>Pipelines for Social Bias Testing of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Bianchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzana</namePart>
<namePart type="family">Ilic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Gallé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">virtual+Dublin</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The maturity level of language models is now at a stage in which many companies rely on them to solve various tasks. However, while research has shown how biased and harmful these models are, systematic ways of integrating social bias tests into development pipelines are still lacking. This short paper suggests how to use these verification techniques in development pipelines. We take inspiration from software testing and suggest addressing social bias evaluation as software testing. We hope to open a discussion on the best methodologies to handle social bias testing in language models.</abstract>
<identifier type="citekey">nozza-etal-2022-pipelines</identifier>
<identifier type="doi">10.18653/v1/2022.bigscience-1.6</identifier>
<location>
<url>https://aclanthology.org/2022.bigscience-1.6</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>68</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pipelines for Social Bias Testing of Large Language Models
%A Nozza, Debora
%A Bianchi, Federico
%A Hovy, Dirk
%Y Fan, Angela
%Y Ilic, Suzana
%Y Wolf, Thomas
%Y Gallé, Matthias
%S Proceedings of BigScience Episode #5 – Workshop on Challenges & Perspectives in Creating Large Language Models
%D 2022
%8 May
%I Association for Computational Linguistics
%C virtual+Dublin
%F nozza-etal-2022-pipelines
%X The maturity level of language models is now at a stage in which many companies rely on them to solve various tasks. However, while research has shown how biased and harmful these models are, systematic ways of integrating social bias tests into development pipelines are still lacking. This short paper suggests how to use these verification techniques in development pipelines. We take inspiration from software testing and suggest addressing social bias evaluation as software testing. We hope to open a discussion on the best methodologies to handle social bias testing in language models.
%R 10.18653/v1/2022.bigscience-1.6
%U https://aclanthology.org/2022.bigscience-1.6
%U https://doi.org/10.18653/v1/2022.bigscience-1.6
%P 68-74
Markdown (Informal)
[Pipelines for Social Bias Testing of Large Language Models](https://aclanthology.org/2022.bigscience-1.6) (Nozza et al., BigScience 2022)
ACL
- Debora Nozza, Federico Bianchi, and Dirk Hovy. 2022. Pipelines for Social Bias Testing of Large Language Models. In Proceedings of BigScience Episode #5 -- Workshop on Challenges & Perspectives in Creating Large Language Models, pages 68–74, virtual+Dublin. Association for Computational Linguistics.