@inproceedings{eckman-etal-2025-aligning,
title = "Aligning {NLP} Models with Target Population Perspectives using {PAIR}: Population-Aligned Instance Replication",
author = "Eckman, Stephanie and
Ma, Bolei and
Kern, Christoph and
Chew, Rob and
Plank, Barbara and
Kreuter, Frauke",
editor = "Abercrombie, Gavin and
Basile, Valerio and
Frenda, Simona and
Tonelli, Sara and
Dudy, Shiran",
booktitle = "Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nlperspectives-1.9/",
pages = "100--110",
ISBN = "979-8-89176-350-0",
abstract = "Models trained on crowdsourced annotations may not reflect population views, if those who work as annotators do not represent the broader population. In this paper, we propose PAIR: Population-Aligned Instance Replication, a post-processing method that adjusts training data to better reflect target population characteristics without collecting additional annotations. Using simulation studies on offensive language and hate speech detection with varying annotator compositions, we show that non-representative pools degrade model calibration while leaving accuracy largely unchanged. PAIR corrects these calibration problems by replicating annotations from underrepresented annotator groups to match population proportions. We conclude with recommendations for improving the representativity of training data and model performance."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eckman-etal-2025-aligning">
<titleInfo>
<title>Aligning NLP Models with Target Population Perspectives using PAIR: Population-Aligned Instance Replication</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Eckman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bolei</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Kern</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Chew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frauke</namePart>
<namePart type="family">Kreuter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gavin</namePart>
<namePart type="family">Abercrombie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valerio</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simona</namePart>
<namePart type="family">Frenda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiran</namePart>
<namePart type="family">Dudy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-350-0</identifier>
</relatedItem>
<abstract>Models trained on crowdsourced annotations may not reflect population views, if those who work as annotators do not represent the broader population. In this paper, we propose PAIR: Population-Aligned Instance Replication, a post-processing method that adjusts training data to better reflect target population characteristics without collecting additional annotations. Using simulation studies on offensive language and hate speech detection with varying annotator compositions, we show that non-representative pools degrade model calibration while leaving accuracy largely unchanged. PAIR corrects these calibration problems by replicating annotations from underrepresented annotator groups to match population proportions. We conclude with recommendations for improving the representativity of training data and model performance.</abstract>
<identifier type="citekey">eckman-etal-2025-aligning</identifier>
<location>
<url>https://aclanthology.org/2025.nlperspectives-1.9/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>100</start>
<end>110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Aligning NLP Models with Target Population Perspectives using PAIR: Population-Aligned Instance Replication
%A Eckman, Stephanie
%A Ma, Bolei
%A Kern, Christoph
%A Chew, Rob
%A Plank, Barbara
%A Kreuter, Frauke
%Y Abercrombie, Gavin
%Y Basile, Valerio
%Y Frenda, Simona
%Y Tonelli, Sara
%Y Dudy, Shiran
%S Proceedings of the The 4th Workshop on Perspectivist Approaches to NLP
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-350-0
%F eckman-etal-2025-aligning
%X Models trained on crowdsourced annotations may not reflect population views, if those who work as annotators do not represent the broader population. In this paper, we propose PAIR: Population-Aligned Instance Replication, a post-processing method that adjusts training data to better reflect target population characteristics without collecting additional annotations. Using simulation studies on offensive language and hate speech detection with varying annotator compositions, we show that non-representative pools degrade model calibration while leaving accuracy largely unchanged. PAIR corrects these calibration problems by replicating annotations from underrepresented annotator groups to match population proportions. We conclude with recommendations for improving the representativity of training data and model performance.
%U https://aclanthology.org/2025.nlperspectives-1.9/
%P 100-110
Markdown (Informal)
[Aligning NLP Models with Target Population Perspectives using PAIR: Population-Aligned Instance Replication](https://aclanthology.org/2025.nlperspectives-1.9/) (Eckman et al., NLPerspectives 2025)
ACL