@inproceedings{kang-etal-2025-pqr,
title = "{PQR}: Improving Dense Retrieval via Potential Query Modeling",
author = "Kang, Junfeng and
Li, Rui and
Liu, Qi and
Chen, Yanjiang and
Zhang, Zheng and
Jiang, Junzhe and
Yu, Heng and
Su, Yu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.660/",
doi = "10.18653/v1/2025.acl-long.660",
pages = "13455--13469",
ISBN = "979-8-89176-251-0",
abstract = "Dense retrieval has now become the mainstream paradigm in information retrieval. The core idea of dense retrieval is to align document embeddings with their corresponding query embeddings by maximizing their dot product. The current training data is quite sparse, with each document typically associated with only one or a few labeled queries. However, a single document can be retrieved by multiple different queries. Aligning a document with just one or a limited number of labeled queries results in a loss of its semantic information. In this paper, we propose a training-free Potential Query Retrieval (PQR) framework to address this issue. Specifically, we use a Gaussian mixture distribution to model all potential queries for a document, aiming to capture its comprehensive semantic information. To obtain this distribution, we introduce three sampling strategies to sample a large number of potential queries for each document and encode them into a semantic space. Using these sampled queries, we employ the Expectation-Maximization algorithm to estimate parameters of the distribution. Finally, we also propose a method to calculate similarity scores between user queries and documents under the PQR framework. Extensive experiments demonstrate the effectiveness of the proposed method."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kang-etal-2025-pqr">
<titleInfo>
<title>PQR: Improving Dense Retrieval via Potential Query Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Junfeng</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanjiang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junzhe</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Dense retrieval has now become the mainstream paradigm in information retrieval. The core idea of dense retrieval is to align document embeddings with their corresponding query embeddings by maximizing their dot product. The current training data is quite sparse, with each document typically associated with only one or a few labeled queries. However, a single document can be retrieved by multiple different queries. Aligning a document with just one or a limited number of labeled queries results in a loss of its semantic information. In this paper, we propose a training-free Potential Query Retrieval (PQR) framework to address this issue. Specifically, we use a Gaussian mixture distribution to model all potential queries for a document, aiming to capture its comprehensive semantic information. To obtain this distribution, we introduce three sampling strategies to sample a large number of potential queries for each document and encode them into a semantic space. Using these sampled queries, we employ the Expectation-Maximization algorithm to estimate parameters of the distribution. Finally, we also propose a method to calculate similarity scores between user queries and documents under the PQR framework. Extensive experiments demonstrate the effectiveness of the proposed method.</abstract>
<identifier type="citekey">kang-etal-2025-pqr</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.660</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.660/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>13455</start>
<end>13469</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PQR: Improving Dense Retrieval via Potential Query Modeling
%A Kang, Junfeng
%A Li, Rui
%A Liu, Qi
%A Chen, Yanjiang
%A Zhang, Zheng
%A Jiang, Junzhe
%A Yu, Heng
%A Su, Yu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F kang-etal-2025-pqr
%X Dense retrieval has now become the mainstream paradigm in information retrieval. The core idea of dense retrieval is to align document embeddings with their corresponding query embeddings by maximizing their dot product. The current training data is quite sparse, with each document typically associated with only one or a few labeled queries. However, a single document can be retrieved by multiple different queries. Aligning a document with just one or a limited number of labeled queries results in a loss of its semantic information. In this paper, we propose a training-free Potential Query Retrieval (PQR) framework to address this issue. Specifically, we use a Gaussian mixture distribution to model all potential queries for a document, aiming to capture its comprehensive semantic information. To obtain this distribution, we introduce three sampling strategies to sample a large number of potential queries for each document and encode them into a semantic space. Using these sampled queries, we employ the Expectation-Maximization algorithm to estimate parameters of the distribution. Finally, we also propose a method to calculate similarity scores between user queries and documents under the PQR framework. Extensive experiments demonstrate the effectiveness of the proposed method.
%R 10.18653/v1/2025.acl-long.660
%U https://aclanthology.org/2025.acl-long.660/
%U https://doi.org/10.18653/v1/2025.acl-long.660
%P 13455-13469
Markdown (Informal)
[PQR: Improving Dense Retrieval via Potential Query Modeling](https://aclanthology.org/2025.acl-long.660/) (Kang et al., ACL 2025)
ACL
- Junfeng Kang, Rui Li, Qi Liu, Yanjiang Chen, Zheng Zhang, Junzhe Jiang, Heng Yu, and Yu Su. 2025. PQR: Improving Dense Retrieval via Potential Query Modeling. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 13455–13469, Vienna, Austria. Association for Computational Linguistics.