@inproceedings{kuzmin-etal-2025-inference,
title = "Inference-Time Selective Debiasing to Enhance Fairness in Text Classification Models",
author = "Kuzmin, Gleb and
Yadav, Neemesh and
Smirnov, Ivan and
Baldwin, Timothy and
Shelmanov, Artem",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-short.9/",
doi = "10.18653/v1/2025.naacl-short.9",
pages = "95--107",
ISBN = "979-8-89176-190-2",
abstract = "We propose selective debiasing {--} an inference-time safety mechanism designed to enhance the overall model quality in terms of prediction performance and fairness, especially in scenarios where retraining the model is impractical. The method draws inspiration from selective classification, where at inference time, predictions with low quality, as indicated by their uncertainty scores, are discarded. In our approach, we identify the potentially biased model predictions and, instead of discarding them, we remove bias from these predictions using LEACE {--} a post-processing debiasing method. To select problematic predictions, we propose a bias quantification approach based on KL divergence, which achieves better results than standard uncertainty quantification methods. Experiments on text classification datasets with encoder-based classification models demonstrate that selective debiasing helps to reduce the performance gap between post-processing methods and debiasing techniques from the at-training and pre-processing categories."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuzmin-etal-2025-inference">
<titleInfo>
<title>Inference-Time Selective Debiasing to Enhance Fairness in Text Classification Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gleb</namePart>
<namePart type="family">Kuzmin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neemesh</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Smirnov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Shelmanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-190-2</identifier>
</relatedItem>
<abstract>We propose selective debiasing – an inference-time safety mechanism designed to enhance the overall model quality in terms of prediction performance and fairness, especially in scenarios where retraining the model is impractical. The method draws inspiration from selective classification, where at inference time, predictions with low quality, as indicated by their uncertainty scores, are discarded. In our approach, we identify the potentially biased model predictions and, instead of discarding them, we remove bias from these predictions using LEACE – a post-processing debiasing method. To select problematic predictions, we propose a bias quantification approach based on KL divergence, which achieves better results than standard uncertainty quantification methods. Experiments on text classification datasets with encoder-based classification models demonstrate that selective debiasing helps to reduce the performance gap between post-processing methods and debiasing techniques from the at-training and pre-processing categories.</abstract>
<identifier type="citekey">kuzmin-etal-2025-inference</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-short.9</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-short.9/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>95</start>
<end>107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inference-Time Selective Debiasing to Enhance Fairness in Text Classification Models
%A Kuzmin, Gleb
%A Yadav, Neemesh
%A Smirnov, Ivan
%A Baldwin, Timothy
%A Shelmanov, Artem
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-190-2
%F kuzmin-etal-2025-inference
%X We propose selective debiasing – an inference-time safety mechanism designed to enhance the overall model quality in terms of prediction performance and fairness, especially in scenarios where retraining the model is impractical. The method draws inspiration from selective classification, where at inference time, predictions with low quality, as indicated by their uncertainty scores, are discarded. In our approach, we identify the potentially biased model predictions and, instead of discarding them, we remove bias from these predictions using LEACE – a post-processing debiasing method. To select problematic predictions, we propose a bias quantification approach based on KL divergence, which achieves better results than standard uncertainty quantification methods. Experiments on text classification datasets with encoder-based classification models demonstrate that selective debiasing helps to reduce the performance gap between post-processing methods and debiasing techniques from the at-training and pre-processing categories.
%R 10.18653/v1/2025.naacl-short.9
%U https://aclanthology.org/2025.naacl-short.9/
%U https://doi.org/10.18653/v1/2025.naacl-short.9
%P 95-107
Markdown (Informal)
[Inference-Time Selective Debiasing to Enhance Fairness in Text Classification Models](https://aclanthology.org/2025.naacl-short.9/) (Kuzmin et al., NAACL 2025)
ACL
- Gleb Kuzmin, Neemesh Yadav, Ivan Smirnov, Timothy Baldwin, and Artem Shelmanov. 2025. Inference-Time Selective Debiasing to Enhance Fairness in Text Classification Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers), pages 95–107, Albuquerque, New Mexico. Association for Computational Linguistics.