@inproceedings{glenn-etal-2022-viability,
title = "The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias",
author = "Glenn, Parker and
Jacobs, Cassandra L. and
Thielk, Marvin and
Chu, Yi",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Tonelli, Sara and
Rieser, Verena and
Uma, Alexandra",
booktitle = "Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.nlperspectives-1.5",
pages = "32--36",
abstract = "Annotating workplace bias in text is a noisy and subjective task. In encoding the inherently continuous nature of bias, aggregated binary classifications do not suffice. Best-worst scaling (BWS) offers a framework to obtain real-valued scores through a series of comparative evaluations, but it is often impractical to deploy to traditional annotation pipelines within industry. We present analyses of a small-scale bias dataset, jointly annotated with categorical annotations and BWS annotations. We show that there is a strong correlation between observed agreement and BWS score (Spearman{'}s r=0.72). We identify several shortcomings of BWS relative to traditional categorical annotation: (1) When compared to categorical annotation, we estimate BWS takes approximately 4.5x longer to complete; (2) BWS does not scale well to large annotation tasks with sparse target phenomena; (3) The high correlation between BWS and the traditional task shows that the benefits of BWS can be recovered from a simple categorically annotated, non-aggregated dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="glenn-etal-2022-viability">
<titleInfo>
<title>The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parker</namePart>
<namePart type="family">Glenn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cassandra</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Jacobs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marvin</namePart>
<namePart type="family">Thielk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gavin</namePart>
<namePart type="family">Abercrombie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verena</namePart>
<namePart type="family">Rieser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="family">Uma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Annotating workplace bias in text is a noisy and subjective task. In encoding the inherently continuous nature of bias, aggregated binary classifications do not suffice. Best-worst scaling (BWS) offers a framework to obtain real-valued scores through a series of comparative evaluations, but it is often impractical to deploy to traditional annotation pipelines within industry. We present analyses of a small-scale bias dataset, jointly annotated with categorical annotations and BWS annotations. We show that there is a strong correlation between observed agreement and BWS score (Spearman’s r=0.72). We identify several shortcomings of BWS relative to traditional categorical annotation: (1) When compared to categorical annotation, we estimate BWS takes approximately 4.5x longer to complete; (2) BWS does not scale well to large annotation tasks with sparse target phenomena; (3) The high correlation between BWS and the traditional task shows that the benefits of BWS can be recovered from a simple categorically annotated, non-aggregated dataset.</abstract>
<identifier type="citekey">glenn-etal-2022-viability</identifier>
<location>
<url>https://aclanthology.org/2022.nlperspectives-1.5</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>32</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias
%A Glenn, Parker
%A Jacobs, Cassandra L.
%A Thielk, Marvin
%A Chu, Yi
%Y Abercrombie, Gavin
%Y Basile, Valerio
%Y Tonelli, Sara
%Y Rieser, Verena
%Y Uma, Alexandra
%S Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F glenn-etal-2022-viability
%X Annotating workplace bias in text is a noisy and subjective task. In encoding the inherently continuous nature of bias, aggregated binary classifications do not suffice. Best-worst scaling (BWS) offers a framework to obtain real-valued scores through a series of comparative evaluations, but it is often impractical to deploy to traditional annotation pipelines within industry. We present analyses of a small-scale bias dataset, jointly annotated with categorical annotations and BWS annotations. We show that there is a strong correlation between observed agreement and BWS score (Spearman’s r=0.72). We identify several shortcomings of BWS relative to traditional categorical annotation: (1) When compared to categorical annotation, we estimate BWS takes approximately 4.5x longer to complete; (2) BWS does not scale well to large annotation tasks with sparse target phenomena; (3) The high correlation between BWS and the traditional task shows that the benefits of BWS can be recovered from a simple categorically annotated, non-aggregated dataset.
%U https://aclanthology.org/2022.nlperspectives-1.5
%P 32-36
Markdown (Informal)
[The Viability of Best-worst Scaling and Categorical Data Label Annotation Tasks in Detecting Implicit Bias](https://aclanthology.org/2022.nlperspectives-1.5) (Glenn et al., NLPerspectives 2022)
ACL