@inproceedings{singh-etal-2025-tfdp,
title = "{TFDP}: Token-Efficient Disparity Audits for Autoregressive {LLM}s via Single-Token Masked Evaluation",
author = "Singh, Inderjeet and
Srinivasan, Ramya and
Vainshtein, Roman and
Kojima, Hisashi",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1250/",
pages = "24609--24626",
ISBN = "979-8-89176-332-6",
abstract = "Auditing autoregressive Large Language Models (LLMs) for disparities is often impeded by high token costs and limited precision. We introduce Token-Focused Disparity Probing (TFDP), a novel methodology overcoming these challenges by adapting single-token masked prediction to autoregressive architectures via targeted token querying. Disparities between minimally contrastive sentence pairs are quantified through a multi-scale semantic alignment score that integrates sentence, local-context, and token embeddings with adaptive weighting. We propose three disparity metrics: Preference Score ($\mathcal{PS}$), Prediction Set Divergence ($\mathcal{PSD}$), and Weighted Final Score ($\mathcal{WFS}$), for comprehensive assessment. Evaluated on our customized Proverbs Disparity Dataset (PDD) with controlled attribute toggles (e.g., gender bias, misinformation susceptibility), TFDP precisely detects disparities while achieving up to 42 times fewer output tokens than minimal n-token continuations, offering a scalable tool for responsible LLM evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-etal-2025-tfdp">
<titleInfo>
<title>TFDP: Token-Efficient Disparity Audits for Autoregressive LLMs via Single-Token Masked Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Inderjeet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramya</namePart>
<namePart type="family">Srinivasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Vainshtein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hisashi</namePart>
<namePart type="family">Kojima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Auditing autoregressive Large Language Models (LLMs) for disparities is often impeded by high token costs and limited precision. We introduce Token-Focused Disparity Probing (TFDP), a novel methodology overcoming these challenges by adapting single-token masked prediction to autoregressive architectures via targeted token querying. Disparities between minimally contrastive sentence pairs are quantified through a multi-scale semantic alignment score that integrates sentence, local-context, and token embeddings with adaptive weighting. We propose three disparity metrics: Preference Score (\mathcalPS), Prediction Set Divergence (\mathcalPSD), and Weighted Final Score (\mathcalWFS), for comprehensive assessment. Evaluated on our customized Proverbs Disparity Dataset (PDD) with controlled attribute toggles (e.g., gender bias, misinformation susceptibility), TFDP precisely detects disparities while achieving up to 42 times fewer output tokens than minimal n-token continuations, offering a scalable tool for responsible LLM evaluation.</abstract>
<identifier type="citekey">singh-etal-2025-tfdp</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1250/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>24609</start>
<end>24626</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TFDP: Token-Efficient Disparity Audits for Autoregressive LLMs via Single-Token Masked Evaluation
%A Singh, Inderjeet
%A Srinivasan, Ramya
%A Vainshtein, Roman
%A Kojima, Hisashi
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F singh-etal-2025-tfdp
%X Auditing autoregressive Large Language Models (LLMs) for disparities is often impeded by high token costs and limited precision. We introduce Token-Focused Disparity Probing (TFDP), a novel methodology overcoming these challenges by adapting single-token masked prediction to autoregressive architectures via targeted token querying. Disparities between minimally contrastive sentence pairs are quantified through a multi-scale semantic alignment score that integrates sentence, local-context, and token embeddings with adaptive weighting. We propose three disparity metrics: Preference Score (\mathcalPS), Prediction Set Divergence (\mathcalPSD), and Weighted Final Score (\mathcalWFS), for comprehensive assessment. Evaluated on our customized Proverbs Disparity Dataset (PDD) with controlled attribute toggles (e.g., gender bias, misinformation susceptibility), TFDP precisely detects disparities while achieving up to 42 times fewer output tokens than minimal n-token continuations, offering a scalable tool for responsible LLM evaluation.
%U https://aclanthology.org/2025.emnlp-main.1250/
%P 24609-24626
Markdown (Informal)
[TFDP: Token-Efficient Disparity Audits for Autoregressive LLMs via Single-Token Masked Evaluation](https://aclanthology.org/2025.emnlp-main.1250/) (Singh et al., EMNLP 2025)
ACL