@inproceedings{lu-etal-2025-multiconir,
title = "{M}ulti{C}on{IR}: Towards Multi-Condition Information Retrieval",
author = "Lu, Xuan and
Liu, Sifan and
Yin, Bochao and
Li, Yongqi and
Chen, Xinghao and
Su, Hui and
Jin, Yaohui and
Zeng, Wenjun and
Shen, Xiaoyu",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.726/",
doi = "10.18653/v1/2025.findings-emnlp.726",
pages = "13471--13494",
ISBN = "979-8-89176-335-7",
abstract = "Multi-condition information retrieval (IR) presents a significant, yet underexplored challenge for existing systems. This paper introduces MultiConIR, the first benchmark specifically designed to evaluate retrieval and reranking models under nuanced multi-condition query scenarios across five diverse domains. We systematically assess model capabilities through three critical tasks: complexity robustness, relevance monotonicity, and query format sensitivity. Our extensive experiments on 15 models reveal a critical vulnerability: most retrievers and rerankers exhibit severe performance degradation as query complexity increases. Key deficiencies include widespread failure to maintain relevance monotonicity, and high sensitivity to query style and condition placement. The superior performance GPT-4o reveals the performance gap between IR systems and advanced LLM for handling sophisticated natural language queries. Furthermore, this work delves into the factors contributing to reranker performance deterioration and examines how condition positioning within queries affects similarity assessment, providing crucial insights for advancing IR systems towards complex search scenarios."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lu-etal-2025-multiconir">
<titleInfo>
<title>MultiConIR: Towards Multi-Condition Information Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xuan</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sifan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bochao</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongqi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinghao</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hui</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaohui</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjun</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyu</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Multi-condition information retrieval (IR) presents a significant, yet underexplored challenge for existing systems. This paper introduces MultiConIR, the first benchmark specifically designed to evaluate retrieval and reranking models under nuanced multi-condition query scenarios across five diverse domains. We systematically assess model capabilities through three critical tasks: complexity robustness, relevance monotonicity, and query format sensitivity. Our extensive experiments on 15 models reveal a critical vulnerability: most retrievers and rerankers exhibit severe performance degradation as query complexity increases. Key deficiencies include widespread failure to maintain relevance monotonicity, and high sensitivity to query style and condition placement. The superior performance GPT-4o reveals the performance gap between IR systems and advanced LLM for handling sophisticated natural language queries. Furthermore, this work delves into the factors contributing to reranker performance deterioration and examines how condition positioning within queries affects similarity assessment, providing crucial insights for advancing IR systems towards complex search scenarios.</abstract>
<identifier type="citekey">lu-etal-2025-multiconir</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.726</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.726/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>13471</start>
<end>13494</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MultiConIR: Towards Multi-Condition Information Retrieval
%A Lu, Xuan
%A Liu, Sifan
%A Yin, Bochao
%A Li, Yongqi
%A Chen, Xinghao
%A Su, Hui
%A Jin, Yaohui
%A Zeng, Wenjun
%A Shen, Xiaoyu
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F lu-etal-2025-multiconir
%X Multi-condition information retrieval (IR) presents a significant, yet underexplored challenge for existing systems. This paper introduces MultiConIR, the first benchmark specifically designed to evaluate retrieval and reranking models under nuanced multi-condition query scenarios across five diverse domains. We systematically assess model capabilities through three critical tasks: complexity robustness, relevance monotonicity, and query format sensitivity. Our extensive experiments on 15 models reveal a critical vulnerability: most retrievers and rerankers exhibit severe performance degradation as query complexity increases. Key deficiencies include widespread failure to maintain relevance monotonicity, and high sensitivity to query style and condition placement. The superior performance GPT-4o reveals the performance gap between IR systems and advanced LLM for handling sophisticated natural language queries. Furthermore, this work delves into the factors contributing to reranker performance deterioration and examines how condition positioning within queries affects similarity assessment, providing crucial insights for advancing IR systems towards complex search scenarios.
%R 10.18653/v1/2025.findings-emnlp.726
%U https://aclanthology.org/2025.findings-emnlp.726/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.726
%P 13471-13494
Markdown (Informal)
[MultiConIR: Towards Multi-Condition Information Retrieval](https://aclanthology.org/2025.findings-emnlp.726/) (Lu et al., Findings 2025)
ACL
- Xuan Lu, Sifan Liu, Bochao Yin, Yongqi Li, Xinghao Chen, Hui Su, Yaohui Jin, Wenjun Zeng, and Xiaoyu Shen. 2025. MultiConIR: Towards Multi-Condition Information Retrieval. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 13471–13494, Suzhou, China. Association for Computational Linguistics.