@inproceedings{d-erasmo-etal-2026-statistical,
title = "Statistical Foundations of {DIME}: Risk Estimation for Practical Index Selection",
author = "D'Erasmo, Giulio and
Campagnano, Cesare and
Mallia, Antonio and
Brutti, Pierpaolo and
Tonellotto, Nicola and
Silvestri, Fabrizio",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 2: Short Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-short.51/",
pages = "722--730",
ISBN = "979-8-89176-381-4",
abstract = "High-dimensional dense embeddings have become central to modern Information Retrieval, but many dimensions are noisy or redundant. Recently proposed DIME (Dimension IMportance Estimation), provides query-dependent scores to identify informative components of embeddings. DIME relies on a costly grid search to select a priori a dimensionality for all the query corpus{'}s embeddings. Our work provides a statistically grounded criterion that directly identifies the optimal set of dimensions for each query at inference time. Experiments confirm that this approach improves retrieval effectiveness and reduces embedding size by an average 50{\%} of across different models and datasets at inference time."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="d-erasmo-etal-2026-statistical">
<titleInfo>
<title>Statistical Foundations of DIME: Risk Estimation for Practical Index Selection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giulio</namePart>
<namePart type="family">D’Erasmo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cesare</namePart>
<namePart type="family">Campagnano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Mallia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierpaolo</namePart>
<namePart type="family">Brutti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Tonellotto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrizio</namePart>
<namePart type="family">Silvestri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-381-4</identifier>
</relatedItem>
<abstract>High-dimensional dense embeddings have become central to modern Information Retrieval, but many dimensions are noisy or redundant. Recently proposed DIME (Dimension IMportance Estimation), provides query-dependent scores to identify informative components of embeddings. DIME relies on a costly grid search to select a priori a dimensionality for all the query corpus’s embeddings. Our work provides a statistically grounded criterion that directly identifies the optimal set of dimensions for each query at inference time. Experiments confirm that this approach improves retrieval effectiveness and reduces embedding size by an average 50% of across different models and datasets at inference time.</abstract>
<identifier type="citekey">d-erasmo-etal-2026-statistical</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-short.51/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>722</start>
<end>730</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Statistical Foundations of DIME: Risk Estimation for Practical Index Selection
%A D’Erasmo, Giulio
%A Campagnano, Cesare
%A Mallia, Antonio
%A Brutti, Pierpaolo
%A Tonellotto, Nicola
%A Silvestri, Fabrizio
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-381-4
%F d-erasmo-etal-2026-statistical
%X High-dimensional dense embeddings have become central to modern Information Retrieval, but many dimensions are noisy or redundant. Recently proposed DIME (Dimension IMportance Estimation), provides query-dependent scores to identify informative components of embeddings. DIME relies on a costly grid search to select a priori a dimensionality for all the query corpus’s embeddings. Our work provides a statistically grounded criterion that directly identifies the optimal set of dimensions for each query at inference time. Experiments confirm that this approach improves retrieval effectiveness and reduces embedding size by an average 50% of across different models and datasets at inference time.
%U https://aclanthology.org/2026.eacl-short.51/
%P 722-730
Markdown (Informal)
[Statistical Foundations of DIME: Risk Estimation for Practical Index Selection](https://aclanthology.org/2026.eacl-short.51/) (D'Erasmo et al., EACL 2026)
ACL
- Giulio D'Erasmo, Cesare Campagnano, Antonio Mallia, Pierpaolo Brutti, Nicola Tonellotto, and Fabrizio Silvestri. 2026. Statistical Foundations of DIME: Risk Estimation for Practical Index Selection. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 2: Short Papers), pages 722–730, Rabat, Morocco. Association for Computational Linguistics.