@inproceedings{kang-etal-2025-trial,
title = "{TRIAL}: Token Relations and Importance Aware Late-interaction for Accurate Text Retrieval",
author = "Kang, Hyukkyu and
Kim, Injung and
Han, Wook-Shin",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.854/",
pages = "16875--16888",
ISBN = "979-8-89176-332-6",
abstract = "Late-interaction based multi-vector retrieval systems have greatly advanced the field of information retrieval by enabling fast and accurate search over millions of documents. However, these systems rely on a naive summation of token-level similarity scores which often leads to inaccurate relevance estimation caused by the tokenization of semantic units (e.g., words and phrases) and the influence of low-content words (e.g., articles and prepositions). To address these challenges, we propose **TRIAL**: **T**oken **R**elations and **I**mportance **A**ware **L**ate-interaction, which enhances late interaction by explicitly modeling token relations and token importance in relevance scoring. Extensive experiments on three widely used benchmarks show that TRIAL achieves state-of-the-art accuracy, with an nDCG@10 of 46.3 on MSMARCO (in-domain), and average nDCG@10 scores of 51.09 and 72.15 on BEIR and LoTTE Search (out-of-domain), respectively. With superior accuracy, TRIAL maintains competitive retrieval speed compared to existing late-interaction methods, making it a practical solution for large-scale text retrieval."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kang-etal-2025-trial">
<titleInfo>
<title>TRIAL: Token Relations and Importance Aware Late-interaction for Accurate Text Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hyukkyu</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wook-Shin</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Late-interaction based multi-vector retrieval systems have greatly advanced the field of information retrieval by enabling fast and accurate search over millions of documents. However, these systems rely on a naive summation of token-level similarity scores which often leads to inaccurate relevance estimation caused by the tokenization of semantic units (e.g., words and phrases) and the influence of low-content words (e.g., articles and prepositions). To address these challenges, we propose **TRIAL**: **T**oken **R**elations and **I**mportance **A**ware **L**ate-interaction, which enhances late interaction by explicitly modeling token relations and token importance in relevance scoring. Extensive experiments on three widely used benchmarks show that TRIAL achieves state-of-the-art accuracy, with an nDCG@10 of 46.3 on MSMARCO (in-domain), and average nDCG@10 scores of 51.09 and 72.15 on BEIR and LoTTE Search (out-of-domain), respectively. With superior accuracy, TRIAL maintains competitive retrieval speed compared to existing late-interaction methods, making it a practical solution for large-scale text retrieval.</abstract>
<identifier type="citekey">kang-etal-2025-trial</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.854/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>16875</start>
<end>16888</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TRIAL: Token Relations and Importance Aware Late-interaction for Accurate Text Retrieval
%A Kang, Hyukkyu
%A Kim, Injung
%A Han, Wook-Shin
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F kang-etal-2025-trial
%X Late-interaction based multi-vector retrieval systems have greatly advanced the field of information retrieval by enabling fast and accurate search over millions of documents. However, these systems rely on a naive summation of token-level similarity scores which often leads to inaccurate relevance estimation caused by the tokenization of semantic units (e.g., words and phrases) and the influence of low-content words (e.g., articles and prepositions). To address these challenges, we propose **TRIAL**: **T**oken **R**elations and **I**mportance **A**ware **L**ate-interaction, which enhances late interaction by explicitly modeling token relations and token importance in relevance scoring. Extensive experiments on three widely used benchmarks show that TRIAL achieves state-of-the-art accuracy, with an nDCG@10 of 46.3 on MSMARCO (in-domain), and average nDCG@10 scores of 51.09 and 72.15 on BEIR and LoTTE Search (out-of-domain), respectively. With superior accuracy, TRIAL maintains competitive retrieval speed compared to existing late-interaction methods, making it a practical solution for large-scale text retrieval.
%U https://aclanthology.org/2025.emnlp-main.854/
%P 16875-16888
Markdown (Informal)
[TRIAL: Token Relations and Importance Aware Late-interaction for Accurate Text Retrieval](https://aclanthology.org/2025.emnlp-main.854/) (Kang et al., EMNLP 2025)
ACL