@inproceedings{suster-etal-2023-promoting,
title = "Promoting Fairness in Classification of Quality of Medical Evidence",
author = "Suster, Simon and
Baldwin, Timothy and
Verspoor, Karin",
editor = "Demner-fushman, Dina and
Ananiadou, Sophia and
Cohen, Kevin",
booktitle = "The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bionlp-1.39",
doi = "10.18653/v1/2023.bionlp-1.39",
pages = "413--426",
abstract = "Automatically rating the quality of published research is a critical step in medical evidence synthesis. While several methods have been proposed, their algorithmic fairness has been overlooked even though significant risks may follow when such systems are deployed in biomedical contexts. In this work, we study fairness on two systems along two sensitive attributes, participant sex and medical area. In some cases, we find important inequalities, leading us to apply various debiasing methods. Upon examining an interplay of systems{'} predictive performance, fairness, as well as medically critical selective classification capabilities and calibration performance, we find that fairness can sometimes improve through debiasing, but at a cost in other performance measures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="suster-etal-2023-promoting">
<titleInfo>
<title>Promoting Fairness in Classification of Quality of Medical Evidence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Suster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Verspoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatically rating the quality of published research is a critical step in medical evidence synthesis. While several methods have been proposed, their algorithmic fairness has been overlooked even though significant risks may follow when such systems are deployed in biomedical contexts. In this work, we study fairness on two systems along two sensitive attributes, participant sex and medical area. In some cases, we find important inequalities, leading us to apply various debiasing methods. Upon examining an interplay of systems’ predictive performance, fairness, as well as medically critical selective classification capabilities and calibration performance, we find that fairness can sometimes improve through debiasing, but at a cost in other performance measures.</abstract>
<identifier type="citekey">suster-etal-2023-promoting</identifier>
<identifier type="doi">10.18653/v1/2023.bionlp-1.39</identifier>
<location>
<url>https://aclanthology.org/2023.bionlp-1.39</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>413</start>
<end>426</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Promoting Fairness in Classification of Quality of Medical Evidence
%A Suster, Simon
%A Baldwin, Timothy
%A Verspoor, Karin
%Y Demner-fushman, Dina
%Y Ananiadou, Sophia
%Y Cohen, Kevin
%S The 22nd Workshop on Biomedical Natural Language Processing and BioNLP Shared Tasks
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F suster-etal-2023-promoting
%X Automatically rating the quality of published research is a critical step in medical evidence synthesis. While several methods have been proposed, their algorithmic fairness has been overlooked even though significant risks may follow when such systems are deployed in biomedical contexts. In this work, we study fairness on two systems along two sensitive attributes, participant sex and medical area. In some cases, we find important inequalities, leading us to apply various debiasing methods. Upon examining an interplay of systems’ predictive performance, fairness, as well as medically critical selective classification capabilities and calibration performance, we find that fairness can sometimes improve through debiasing, but at a cost in other performance measures.
%R 10.18653/v1/2023.bionlp-1.39
%U https://aclanthology.org/2023.bionlp-1.39
%U https://doi.org/10.18653/v1/2023.bionlp-1.39
%P 413-426
Markdown (Informal)
[Promoting Fairness in Classification of Quality of Medical Evidence](https://aclanthology.org/2023.bionlp-1.39) (Suster et al., BioNLP 2023)
ACL