@inproceedings{malviya-etal-2025-mst,
title = "{MST}-{R}: Multi-Stage Tuning for Retrieval Systems and Metric Evaluation",
author = "Malviya, Yash and
Dhingra, Karan and
Singh, Maneesh",
editor = "Gokhan, Tuba and
Wang, Kexin and
Gurevych, Iryna and
Briscoe, Ted",
booktitle = "Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.regnlp-1.7/",
pages = "41--51",
abstract = "Regulatory documents are rich in nuanced terminology and specialized semantics. FRAG systems: Frozen retrieval-augmented generators utilizing pre-trained (or, frozen) components face consequent challenges with both retriever and answering performance. We present a system that adapts the retriever performance to the target domain using a multi-stage tuning (MST) strategy. Our retrieval approach, called MST-R (a) first fine-tunes encoders used in vector stores using hard negative mining, (b) then uses a hybrid retriever, combining sparse and dense retrievers using reciprocal rank fusion, and then (c) adapts the cross-attention encoder by fine-tuning only the top-k retrieved results. We benchmark the system performance on the dataset released for the RIRAG challenge (as part of the RegNLP workshop at COLING 2025). We achieve significant performance gains obtaining a top rank on the RegNLP challenge leaderboard. We also show that a trivial answering approach *games* the RePASs metric outscoring all baselines and a pre-trained Llama model. Analyzing this anomaly, we present important takeaways for future research. We also release our [code base](https://github.com/Indic-aiDias/MST-R)"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="malviya-etal-2025-mst">
<titleInfo>
<title>MST-R: Multi-Stage Tuning for Retrieval Systems and Metric Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="family">Malviya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maneesh</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tuba</namePart>
<namePart type="family">Gokhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kexin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ted</namePart>
<namePart type="family">Briscoe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Regulatory documents are rich in nuanced terminology and specialized semantics. FRAG systems: Frozen retrieval-augmented generators utilizing pre-trained (or, frozen) components face consequent challenges with both retriever and answering performance. We present a system that adapts the retriever performance to the target domain using a multi-stage tuning (MST) strategy. Our retrieval approach, called MST-R (a) first fine-tunes encoders used in vector stores using hard negative mining, (b) then uses a hybrid retriever, combining sparse and dense retrievers using reciprocal rank fusion, and then (c) adapts the cross-attention encoder by fine-tuning only the top-k retrieved results. We benchmark the system performance on the dataset released for the RIRAG challenge (as part of the RegNLP workshop at COLING 2025). We achieve significant performance gains obtaining a top rank on the RegNLP challenge leaderboard. We also show that a trivial answering approach *games* the RePASs metric outscoring all baselines and a pre-trained Llama model. Analyzing this anomaly, we present important takeaways for future research. We also release our [code base](https://github.com/Indic-aiDias/MST-R)</abstract>
<identifier type="citekey">malviya-etal-2025-mst</identifier>
<location>
<url>https://aclanthology.org/2025.regnlp-1.7/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>41</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MST-R: Multi-Stage Tuning for Retrieval Systems and Metric Evaluation
%A Malviya, Yash
%A Dhingra, Karan
%A Singh, Maneesh
%Y Gokhan, Tuba
%Y Wang, Kexin
%Y Gurevych, Iryna
%Y Briscoe, Ted
%S Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F malviya-etal-2025-mst
%X Regulatory documents are rich in nuanced terminology and specialized semantics. FRAG systems: Frozen retrieval-augmented generators utilizing pre-trained (or, frozen) components face consequent challenges with both retriever and answering performance. We present a system that adapts the retriever performance to the target domain using a multi-stage tuning (MST) strategy. Our retrieval approach, called MST-R (a) first fine-tunes encoders used in vector stores using hard negative mining, (b) then uses a hybrid retriever, combining sparse and dense retrievers using reciprocal rank fusion, and then (c) adapts the cross-attention encoder by fine-tuning only the top-k retrieved results. We benchmark the system performance on the dataset released for the RIRAG challenge (as part of the RegNLP workshop at COLING 2025). We achieve significant performance gains obtaining a top rank on the RegNLP challenge leaderboard. We also show that a trivial answering approach *games* the RePASs metric outscoring all baselines and a pre-trained Llama model. Analyzing this anomaly, we present important takeaways for future research. We also release our [code base](https://github.com/Indic-aiDias/MST-R)
%U https://aclanthology.org/2025.regnlp-1.7/
%P 41-51
Markdown (Informal)
[MST-R: Multi-Stage Tuning for Retrieval Systems and Metric Evaluation](https://aclanthology.org/2025.regnlp-1.7/) (Malviya et al., RegNLP 2025)
ACL