@inproceedings{li-etal-2025-coevo,
title = "{C}o{E}vo: Coevolution of {LLM} and Retrieval Model for Domain-Specific Information Retrieval",
author = "Li, Ang and
Wu, Yiquan and
Hu, Yinghao and
Qing, Lizhi and
Wang, Shihang and
Liu, Chengyuan and
Wu, Tao and
Jatowt, Adam and
Cai, Ming and
Wu, Fei and
Kuang, Kun",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.757/",
pages = "14991--15010",
ISBN = "979-8-89176-332-6",
abstract = "Information retrieval in specialized domains (e.g., legal and medical) faces challenges in aligning user queries, often expressed in colloquial language, with highly structured, terminology-rich documents. This discrepancy creates a distribution gap in the text representation. Recent methods aim to enhance queries by generating intermediary elements (e.g., keywords, pseudo-documents) before performing retrieval with large language models (LLMs). However, by treating LLMs and retrievers separately, these approaches risk producing unreliable or irrelevant intermediaries, which can significantly degrade retrieval performance. To address this issue, we propose CoEvo, an alternating optimization framework that facilitates the coevolution of LLMs and retrieval models. CoEvo operates through two key steps: L-step directs the LLM in generating intermediaries by leveraging an archive of historical examples known to enhance retrieval. R-step trains the retriever using contrastive learning on the intermediaries produced by the LLM. Finally, we evaluate and flexibly leverage content generated by the LLM to amplify the effectiveness of coevolution. Experimental results demonstrate significant improvements in retrieval performance across both legal and medical domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-coevo">
<titleInfo>
<title>CoEvo: Coevolution of LLM and Retrieval Model for Domain-Specific Information Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiquan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinghao</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lizhi</namePart>
<namePart type="family">Qing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shihang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Jatowt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Kuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Information retrieval in specialized domains (e.g., legal and medical) faces challenges in aligning user queries, often expressed in colloquial language, with highly structured, terminology-rich documents. This discrepancy creates a distribution gap in the text representation. Recent methods aim to enhance queries by generating intermediary elements (e.g., keywords, pseudo-documents) before performing retrieval with large language models (LLMs). However, by treating LLMs and retrievers separately, these approaches risk producing unreliable or irrelevant intermediaries, which can significantly degrade retrieval performance. To address this issue, we propose CoEvo, an alternating optimization framework that facilitates the coevolution of LLMs and retrieval models. CoEvo operates through two key steps: L-step directs the LLM in generating intermediaries by leveraging an archive of historical examples known to enhance retrieval. R-step trains the retriever using contrastive learning on the intermediaries produced by the LLM. Finally, we evaluate and flexibly leverage content generated by the LLM to amplify the effectiveness of coevolution. Experimental results demonstrate significant improvements in retrieval performance across both legal and medical domains.</abstract>
<identifier type="citekey">li-etal-2025-coevo</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.757/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>14991</start>
<end>15010</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CoEvo: Coevolution of LLM and Retrieval Model for Domain-Specific Information Retrieval
%A Li, Ang
%A Wu, Yiquan
%A Hu, Yinghao
%A Qing, Lizhi
%A Wang, Shihang
%A Liu, Chengyuan
%A Wu, Tao
%A Jatowt, Adam
%A Cai, Ming
%A Wu, Fei
%A Kuang, Kun
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F li-etal-2025-coevo
%X Information retrieval in specialized domains (e.g., legal and medical) faces challenges in aligning user queries, often expressed in colloquial language, with highly structured, terminology-rich documents. This discrepancy creates a distribution gap in the text representation. Recent methods aim to enhance queries by generating intermediary elements (e.g., keywords, pseudo-documents) before performing retrieval with large language models (LLMs). However, by treating LLMs and retrievers separately, these approaches risk producing unreliable or irrelevant intermediaries, which can significantly degrade retrieval performance. To address this issue, we propose CoEvo, an alternating optimization framework that facilitates the coevolution of LLMs and retrieval models. CoEvo operates through two key steps: L-step directs the LLM in generating intermediaries by leveraging an archive of historical examples known to enhance retrieval. R-step trains the retriever using contrastive learning on the intermediaries produced by the LLM. Finally, we evaluate and flexibly leverage content generated by the LLM to amplify the effectiveness of coevolution. Experimental results demonstrate significant improvements in retrieval performance across both legal and medical domains.
%U https://aclanthology.org/2025.emnlp-main.757/
%P 14991-15010
Markdown (Informal)
[CoEvo: Coevolution of LLM and Retrieval Model for Domain-Specific Information Retrieval](https://aclanthology.org/2025.emnlp-main.757/) (Li et al., EMNLP 2025)
ACL
- Ang Li, Yiquan Wu, Yinghao Hu, Lizhi Qing, Shihang Wang, Chengyuan Liu, Tao Wu, Adam Jatowt, Ming Cai, Fei Wu, and Kun Kuang. 2025. CoEvo: Coevolution of LLM and Retrieval Model for Domain-Specific Information Retrieval. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 14991–15010, Suzhou, China. Association for Computational Linguistics.