@inproceedings{ranaldi-etal-2024-trip,
title = "A Trip Towards Fairness: Bias and De-Biasing in Large Language Models",
author = "Ranaldi, Leonardo and
Ruzzetti, Elena Sofia and
Venditti, Davide and
Onorati, Dario and
Zanzotto, Fabio Massimo",
editor = "Bollegala, Danushka and
Shwartz, Vered",
booktitle = "Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.starsem-1.30",
doi = "10.18653/v1/2024.starsem-1.30",
pages = "372--384",
abstract = "Cheap-to-Build Very Large-Language Models (CtB-LLMs) with affordable training are emerging as the next big revolution in natural language processing and understanding. These CtB-LLMs are democratizing access to trainable Very Large-Language Models (VLLMs) and, thus, may represent the building blocks of many NLP systems solving downstream tasks. Hence, a little or a large bias in CtB-LLMs may cause huge harm. In this paper, we performed a large investigation of the bias of three families of CtB-LLMs, and we showed that debiasing techniques are effective and usable. Indeed, according to current tests, the LLaMA and the OPT families have an important bias in gender, race, religion, and profession. In contrast to the analysis for other LMMs, we discovered that bias depends not on the number of parameters but on the perplexity. Finally, the debiasing of OPT using LORA reduces bias up to 4.12 points in the normalized stereotype score.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ranaldi-etal-2024-trip">
<titleInfo>
<title>A Trip Towards Fairness: Bias and De-Biasing in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Ranaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="given">Sofia</namePart>
<namePart type="family">Ruzzetti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Davide</namePart>
<namePart type="family">Venditti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dario</namePart>
<namePart type="family">Onorati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="given">Massimo</namePart>
<namePart type="family">Zanzotto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cheap-to-Build Very Large-Language Models (CtB-LLMs) with affordable training are emerging as the next big revolution in natural language processing and understanding. These CtB-LLMs are democratizing access to trainable Very Large-Language Models (VLLMs) and, thus, may represent the building blocks of many NLP systems solving downstream tasks. Hence, a little or a large bias in CtB-LLMs may cause huge harm. In this paper, we performed a large investigation of the bias of three families of CtB-LLMs, and we showed that debiasing techniques are effective and usable. Indeed, according to current tests, the LLaMA and the OPT families have an important bias in gender, race, religion, and profession. In contrast to the analysis for other LMMs, we discovered that bias depends not on the number of parameters but on the perplexity. Finally, the debiasing of OPT using LORA reduces bias up to 4.12 points in the normalized stereotype score.</abstract>
<identifier type="citekey">ranaldi-etal-2024-trip</identifier>
<identifier type="doi">10.18653/v1/2024.starsem-1.30</identifier>
<location>
<url>https://aclanthology.org/2024.starsem-1.30</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>372</start>
<end>384</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Trip Towards Fairness: Bias and De-Biasing in Large Language Models
%A Ranaldi, Leonardo
%A Ruzzetti, Elena Sofia
%A Venditti, Davide
%A Onorati, Dario
%A Zanzotto, Fabio Massimo
%Y Bollegala, Danushka
%Y Shwartz, Vered
%S Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F ranaldi-etal-2024-trip
%X Cheap-to-Build Very Large-Language Models (CtB-LLMs) with affordable training are emerging as the next big revolution in natural language processing and understanding. These CtB-LLMs are democratizing access to trainable Very Large-Language Models (VLLMs) and, thus, may represent the building blocks of many NLP systems solving downstream tasks. Hence, a little or a large bias in CtB-LLMs may cause huge harm. In this paper, we performed a large investigation of the bias of three families of CtB-LLMs, and we showed that debiasing techniques are effective and usable. Indeed, according to current tests, the LLaMA and the OPT families have an important bias in gender, race, religion, and profession. In contrast to the analysis for other LMMs, we discovered that bias depends not on the number of parameters but on the perplexity. Finally, the debiasing of OPT using LORA reduces bias up to 4.12 points in the normalized stereotype score.
%R 10.18653/v1/2024.starsem-1.30
%U https://aclanthology.org/2024.starsem-1.30
%U https://doi.org/10.18653/v1/2024.starsem-1.30
%P 372-384
Markdown (Informal)
[A Trip Towards Fairness: Bias and De-Biasing in Large Language Models](https://aclanthology.org/2024.starsem-1.30) (Ranaldi et al., *SEM 2024)
ACL
- Leonardo Ranaldi, Elena Sofia Ruzzetti, Davide Venditti, Dario Onorati, and Fabio Massimo Zanzotto. 2024. A Trip Towards Fairness: Bias and De-Biasing in Large Language Models. In Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024), pages 372–384, Mexico City, Mexico. Association for Computational Linguistics.