@inproceedings{varangot-reille-etal-2026-generalising,
title = "Generalising {LLM} Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient",
author = "Varangot-Reille, Clovis and
Bouvard, Christophe and
Gourru, Antoine",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-srw.22/",
pages = "304--319",
ISBN = "979-8-89176-383-8",
abstract = "We study model routing for Large Language Model (LLM)-based systems. A model, called the router, dynamically chooses which LLM should handle a given input/query. We challenge the assumption that complex routers are necessary for generalising to new candidate LLMs. We introduce ContextualRouter, a simple meta-evaluation framework that predicts per-model performance for new queries by retrieving similar past queries and reweighting model scores with lightweight attention. During inference, the router balances estimated performance and cost by adjusting a tunable cost penalty parameter. This allows the router to adapt dynamically to the addition or removal of LLMs without the need for retraining. Across five routing benchmarks (SPROUT, RouterBench, LiveBench, BigGenBench, and EmbedLLM), ContextualRouter matches the quality{--}cost trade-offs of other generalisable routers. Surprisingly, a simpler non-parametric baseline, $k$-nearest-neighbour averaging, performs comparably or better, achieving strong performance estimation, high NDCG, and substantial cost savings. Retrieval-based routers remain robust to $k$, embedding size, data sparsity, retrieval degradation, and generalise to unseen queries and models with as little as 1{\%} historical data. These results suggest that effective retrieval alone enables generalisable LLM routing."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="varangot-reille-etal-2026-generalising">
<titleInfo>
<title>Generalising LLM Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient</title>
</titleInfo>
<name type="personal">
<namePart type="given">Clovis</namePart>
<namePart type="family">Varangot-Reille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christophe</namePart>
<namePart type="family">Bouvard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Gourru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Ashish</namePart>
<namePart type="family">Somayajula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsuki</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-383-8</identifier>
</relatedItem>
<abstract>We study model routing for Large Language Model (LLM)-based systems. A model, called the router, dynamically chooses which LLM should handle a given input/query. We challenge the assumption that complex routers are necessary for generalising to new candidate LLMs. We introduce ContextualRouter, a simple meta-evaluation framework that predicts per-model performance for new queries by retrieving similar past queries and reweighting model scores with lightweight attention. During inference, the router balances estimated performance and cost by adjusting a tunable cost penalty parameter. This allows the router to adapt dynamically to the addition or removal of LLMs without the need for retraining. Across five routing benchmarks (SPROUT, RouterBench, LiveBench, BigGenBench, and EmbedLLM), ContextualRouter matches the quality–cost trade-offs of other generalisable routers. Surprisingly, a simpler non-parametric baseline, k-nearest-neighbour averaging, performs comparably or better, achieving strong performance estimation, high NDCG, and substantial cost savings. Retrieval-based routers remain robust to k, embedding size, data sparsity, retrieval degradation, and generalise to unseen queries and models with as little as 1% historical data. These results suggest that effective retrieval alone enables generalisable LLM routing.</abstract>
<identifier type="citekey">varangot-reille-etal-2026-generalising</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-srw.22/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>304</start>
<end>319</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generalising LLM Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient
%A Varangot-Reille, Clovis
%A Bouvard, Christophe
%A Gourru, Antoine
%Y Baez Santamaria, Selene
%Y Somayajula, Sai Ashish
%Y Yamaguchi, Atsuki
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-383-8
%F varangot-reille-etal-2026-generalising
%X We study model routing for Large Language Model (LLM)-based systems. A model, called the router, dynamically chooses which LLM should handle a given input/query. We challenge the assumption that complex routers are necessary for generalising to new candidate LLMs. We introduce ContextualRouter, a simple meta-evaluation framework that predicts per-model performance for new queries by retrieving similar past queries and reweighting model scores with lightweight attention. During inference, the router balances estimated performance and cost by adjusting a tunable cost penalty parameter. This allows the router to adapt dynamically to the addition or removal of LLMs without the need for retraining. Across five routing benchmarks (SPROUT, RouterBench, LiveBench, BigGenBench, and EmbedLLM), ContextualRouter matches the quality–cost trade-offs of other generalisable routers. Surprisingly, a simpler non-parametric baseline, k-nearest-neighbour averaging, performs comparably or better, achieving strong performance estimation, high NDCG, and substantial cost savings. Retrieval-based routers remain robust to k, embedding size, data sparsity, retrieval degradation, and generalise to unseen queries and models with as little as 1% historical data. These results suggest that effective retrieval alone enables generalisable LLM routing.
%U https://aclanthology.org/2026.eacl-srw.22/
%P 304-319
Markdown (Informal)
[Generalising LLM Routing using Past Performance Retrieval: A Few-Shot Router is Sufficient](https://aclanthology.org/2026.eacl-srw.22/) (Varangot-Reille et al., EACL 2026)
ACL