@inproceedings{saranathan-etal-2025-sublime,
title = "{S}ub{LIME}: Subset Selection via Rank Correlation Prediction for Data-Efficient {LLM} Evaluation",
author = "Saranathan, Gayathri and
Xu, Cong and
Alam, Mahammad Parwez and
Kumar, Tarun and
Foltin, Martin and
Wong, Soon Yee and
Bhattacharya, Suparna",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1477/",
doi = "10.18653/v1/2025.acl-long.1477",
pages = "30572--30593",
ISBN = "979-8-89176-251-0",
abstract = "The rapid expansion of Large Language Models (LLMs) and natural language processing datasets has made exhaustive benchmark evaluations computationally prohibitive. Inspired by high-stakes competitions like the International Mathematical Olympiad-where a few well-chosen problems suffice to differentiate top performers{---}we present SubLIME, which reduces evaluation costs by 80{\%} to 99{\%} while preserving ranking fidelity. It trains a Rank Correlation Prediction (RCP) model that combines limited performance data from only 5-20 anchor LLMs with dataset intrinsic metrics - Difficulty, Quality, and Distributional Dispersion-to predict how closely a candidate subset reflects full-benchmark rankings. Guided by these predictions, SubLIME selects a ``winning'' subset (1-20{\%} of full set data) for evaluating new LLMs, preserving global rankings significant better than other data-efficient methods across ten diverse benchmarks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saranathan-etal-2025-sublime">
<titleInfo>
<title>SubLIME: Subset Selection via Rank Correlation Prediction for Data-Efficient LLM Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gayathri</namePart>
<namePart type="family">Saranathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cong</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahammad</namePart>
<namePart type="given">Parwez</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tarun</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Foltin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soon</namePart>
<namePart type="given">Yee</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suparna</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>The rapid expansion of Large Language Models (LLMs) and natural language processing datasets has made exhaustive benchmark evaluations computationally prohibitive. Inspired by high-stakes competitions like the International Mathematical Olympiad-where a few well-chosen problems suffice to differentiate top performers—we present SubLIME, which reduces evaluation costs by 80% to 99% while preserving ranking fidelity. It trains a Rank Correlation Prediction (RCP) model that combines limited performance data from only 5-20 anchor LLMs with dataset intrinsic metrics - Difficulty, Quality, and Distributional Dispersion-to predict how closely a candidate subset reflects full-benchmark rankings. Guided by these predictions, SubLIME selects a “winning” subset (1-20% of full set data) for evaluating new LLMs, preserving global rankings significant better than other data-efficient methods across ten diverse benchmarks.</abstract>
<identifier type="citekey">saranathan-etal-2025-sublime</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1477</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1477/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>30572</start>
<end>30593</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SubLIME: Subset Selection via Rank Correlation Prediction for Data-Efficient LLM Evaluation
%A Saranathan, Gayathri
%A Xu, Cong
%A Alam, Mahammad Parwez
%A Kumar, Tarun
%A Foltin, Martin
%A Wong, Soon Yee
%A Bhattacharya, Suparna
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F saranathan-etal-2025-sublime
%X The rapid expansion of Large Language Models (LLMs) and natural language processing datasets has made exhaustive benchmark evaluations computationally prohibitive. Inspired by high-stakes competitions like the International Mathematical Olympiad-where a few well-chosen problems suffice to differentiate top performers—we present SubLIME, which reduces evaluation costs by 80% to 99% while preserving ranking fidelity. It trains a Rank Correlation Prediction (RCP) model that combines limited performance data from only 5-20 anchor LLMs with dataset intrinsic metrics - Difficulty, Quality, and Distributional Dispersion-to predict how closely a candidate subset reflects full-benchmark rankings. Guided by these predictions, SubLIME selects a “winning” subset (1-20% of full set data) for evaluating new LLMs, preserving global rankings significant better than other data-efficient methods across ten diverse benchmarks.
%R 10.18653/v1/2025.acl-long.1477
%U https://aclanthology.org/2025.acl-long.1477/
%U https://doi.org/10.18653/v1/2025.acl-long.1477
%P 30572-30593
Markdown (Informal)
[SubLIME: Subset Selection via Rank Correlation Prediction for Data-Efficient LLM Evaluation](https://aclanthology.org/2025.acl-long.1477/) (Saranathan et al., ACL 2025)
ACL