@inproceedings{lassen-etal-2023-detecting,
title = "Detecting intersectionality in {NER} models: A data-driven approach",
author = "Lassen, Ida Marie S. and
Almasi, Mina and
Enevoldsen, Kenneth and
Kristensen-McLachlan, Ross Deans",
editor = "Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.latechclfl-1.13",
doi = "10.18653/v1/2023.latechclfl-1.13",
pages = "116--127",
abstract = "The presence of bias is a pressing concern for both engineers and users of language technology. What is less clear is how exactly bias can be measured, so as to rank models relative to the biases they display. Using an innovative experimental method involving data augmentation, we measure the effect of intersectional biases in Danish models used for Name Entity Recognition (NER). We quantify differences in representational biases, understood as a systematic difference in error or what is called error disparity. Our analysis includes both gender and ethnicity to illustrate the effect of multiple dimensions of bias, as well as experiments which look to move beyond a narrowly binary analysis of gender. We show that all contemporary Danish NER models perform systematically worse on non-binary and minority ethnic names, while not showing significant differences for typically Danish names. Our data augmentation technique can be applied on other languages to test for biases which might be relevant for researchers applying NER models to the study of cultural heritage data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lassen-etal-2023-detecting">
<titleInfo>
<title>Detecting intersectionality in NER models: A data-driven approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ida</namePart>
<namePart type="given">Marie</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Lassen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mina</namePart>
<namePart type="family">Almasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Enevoldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ross</namePart>
<namePart type="given">Deans</namePart>
<namePart type="family">Kristensen-McLachlan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The presence of bias is a pressing concern for both engineers and users of language technology. What is less clear is how exactly bias can be measured, so as to rank models relative to the biases they display. Using an innovative experimental method involving data augmentation, we measure the effect of intersectional biases in Danish models used for Name Entity Recognition (NER). We quantify differences in representational biases, understood as a systematic difference in error or what is called error disparity. Our analysis includes both gender and ethnicity to illustrate the effect of multiple dimensions of bias, as well as experiments which look to move beyond a narrowly binary analysis of gender. We show that all contemporary Danish NER models perform systematically worse on non-binary and minority ethnic names, while not showing significant differences for typically Danish names. Our data augmentation technique can be applied on other languages to test for biases which might be relevant for researchers applying NER models to the study of cultural heritage data.</abstract>
<identifier type="citekey">lassen-etal-2023-detecting</identifier>
<identifier type="doi">10.18653/v1/2023.latechclfl-1.13</identifier>
<location>
<url>https://aclanthology.org/2023.latechclfl-1.13</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>116</start>
<end>127</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting intersectionality in NER models: A data-driven approach
%A Lassen, Ida Marie S.
%A Almasi, Mina
%A Enevoldsen, Kenneth
%A Kristensen-McLachlan, Ross Deans
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F lassen-etal-2023-detecting
%X The presence of bias is a pressing concern for both engineers and users of language technology. What is less clear is how exactly bias can be measured, so as to rank models relative to the biases they display. Using an innovative experimental method involving data augmentation, we measure the effect of intersectional biases in Danish models used for Name Entity Recognition (NER). We quantify differences in representational biases, understood as a systematic difference in error or what is called error disparity. Our analysis includes both gender and ethnicity to illustrate the effect of multiple dimensions of bias, as well as experiments which look to move beyond a narrowly binary analysis of gender. We show that all contemporary Danish NER models perform systematically worse on non-binary and minority ethnic names, while not showing significant differences for typically Danish names. Our data augmentation technique can be applied on other languages to test for biases which might be relevant for researchers applying NER models to the study of cultural heritage data.
%R 10.18653/v1/2023.latechclfl-1.13
%U https://aclanthology.org/2023.latechclfl-1.13
%U https://doi.org/10.18653/v1/2023.latechclfl-1.13
%P 116-127
Markdown (Informal)
[Detecting intersectionality in NER models: A data-driven approach](https://aclanthology.org/2023.latechclfl-1.13) (Lassen et al., LaTeCHCLfL 2023)
ACL
- Ida Marie S. Lassen, Mina Almasi, Kenneth Enevoldsen, and Ross Deans Kristensen-McLachlan. 2023. Detecting intersectionality in NER models: A data-driven approach. In Proceedings of the 7th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 116–127, Dubrovnik, Croatia. Association for Computational Linguistics.