@inproceedings{valentini-etal-2023-interpretability,
title = "On the Interpretability and Significance of Bias Metrics in Texts: a {PMI}-based Approach",
author = "Valentini, Francisco and
Rosati, Germ{\'a}n and
Blasi, Dami{\'a}n and
Fernandez Slezak, Diego and
Altszyler, Edgar",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-short.44",
doi = "10.18653/v1/2023.acl-short.44",
pages = "509--520",
abstract = "In recent years, word embeddings have been widely used to measure biases in texts. Even if they have proven to be effective in detecting a wide variety of biases, metrics based on word embeddings lack transparency and interpretability. We analyze an alternative PMI-based metric to quantify biases in texts. It can be expressed as a function of conditional probabilities, which provides a simple interpretation in terms of word co-occurrences. We also prove that it can be approximated by an odds ratio, which allows estimating confidence intervals and statistical significance of textual biases. This approach produces similar results to metrics based on word embeddings when capturing gender gaps of the real world embedded in large corpora.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="valentini-etal-2023-interpretability">
<titleInfo>
<title>On the Interpretability and Significance of Bias Metrics in Texts: a PMI-based Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Valentini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Germán</namePart>
<namePart type="family">Rosati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damián</namePart>
<namePart type="family">Blasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Fernandez Slezak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edgar</namePart>
<namePart type="family">Altszyler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, word embeddings have been widely used to measure biases in texts. Even if they have proven to be effective in detecting a wide variety of biases, metrics based on word embeddings lack transparency and interpretability. We analyze an alternative PMI-based metric to quantify biases in texts. It can be expressed as a function of conditional probabilities, which provides a simple interpretation in terms of word co-occurrences. We also prove that it can be approximated by an odds ratio, which allows estimating confidence intervals and statistical significance of textual biases. This approach produces similar results to metrics based on word embeddings when capturing gender gaps of the real world embedded in large corpora.</abstract>
<identifier type="citekey">valentini-etal-2023-interpretability</identifier>
<identifier type="doi">10.18653/v1/2023.acl-short.44</identifier>
<location>
<url>https://aclanthology.org/2023.acl-short.44</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>509</start>
<end>520</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Interpretability and Significance of Bias Metrics in Texts: a PMI-based Approach
%A Valentini, Francisco
%A Rosati, Germán
%A Blasi, Damián
%A Fernandez Slezak, Diego
%A Altszyler, Edgar
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F valentini-etal-2023-interpretability
%X In recent years, word embeddings have been widely used to measure biases in texts. Even if they have proven to be effective in detecting a wide variety of biases, metrics based on word embeddings lack transparency and interpretability. We analyze an alternative PMI-based metric to quantify biases in texts. It can be expressed as a function of conditional probabilities, which provides a simple interpretation in terms of word co-occurrences. We also prove that it can be approximated by an odds ratio, which allows estimating confidence intervals and statistical significance of textual biases. This approach produces similar results to metrics based on word embeddings when capturing gender gaps of the real world embedded in large corpora.
%R 10.18653/v1/2023.acl-short.44
%U https://aclanthology.org/2023.acl-short.44
%U https://doi.org/10.18653/v1/2023.acl-short.44
%P 509-520
Markdown (Informal)
[On the Interpretability and Significance of Bias Metrics in Texts: a PMI-based Approach](https://aclanthology.org/2023.acl-short.44) (Valentini et al., ACL 2023)
ACL