@inproceedings{ge-etal-2026-ada,
title = "{A}da-{RS}: Adaptive Rejection Sampling for Selective Thinking",
author = "Ge, Yirou and
Li, Yixi and
Chiu, Alec M. and
Shekhar, Shivani and
Pan, Zijie and
Thangali, Avinash and
Chuang, Yun-Shiuan and
Kulkarni, Chaitanya and
Kona, Uma and
Pang, Linsey and
Mehrotra, Prakhar",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-industry.88/",
pages = "1265--1276",
ISBN = "979-8-89176-394-4",
abstract = "Large language models (LLMs) are increasingly being deployed in cost- and latency-sensitive settings. While chain-of-thought improves reasoning, it can waste tokens on simple requests. We study selective thinking for tool-using LLMs and introduce Adaptive Rejection Sampling (Ada-RS), an algorithm-agnostic sample filtering framework for learning selective and efficient reasoning. For each given context, Ada-RS scores multiple sampled completions with an adaptive length-penalized reward then applies stochastic rejection sampling to retain only high-reward candidates (or preference pairs) for downstream optimization. We demonstrate how Ada-RS plugs into both preference pair (e.g. DPO) or grouped policy optimization strategies (e.g. DAPO). Using Qwen3-8B with LoRA on a synthetic tool call-oriented e-commerce benchmark, Ada-RS improves the accuracy-efficiency frontier over standard algorithms by reducing average output tokens by up to {\ensuremath{\sim}}80{\%} and reducing thinking rate by up to {\ensuremath{\sim}}95{\%} while maintaining or improving tool call accuracy. We further demonstrate that these gains generalize across model scales (Qwen3-1.7B, 8B, 14B) and domains ({\ensuremath{\tau}} 2-Bench airline and telecom). These results highlight that training signal selection is a powerful lever for efficient reasoning in latency-sensitive deployments."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ge-etal-2026-ada">
<titleInfo>
<title>Ada-RS: Adaptive Rejection Sampling for Selective Thinking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yirou</namePart>
<namePart type="family">Ge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alec</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Chiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivani</namePart>
<namePart type="family">Shekhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zijie</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avinash</namePart>
<namePart type="family">Thangali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Shiuan</namePart>
<namePart type="family">Chuang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chaitanya</namePart>
<namePart type="family">Kulkarni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Uma</namePart>
<namePart type="family">Kona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linsey</namePart>
<namePart type="family">Pang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prakhar</namePart>
<namePart type="family">Mehrotra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-394-4</identifier>
</relatedItem>
<abstract>Large language models (LLMs) are increasingly being deployed in cost- and latency-sensitive settings. While chain-of-thought improves reasoning, it can waste tokens on simple requests. We study selective thinking for tool-using LLMs and introduce Adaptive Rejection Sampling (Ada-RS), an algorithm-agnostic sample filtering framework for learning selective and efficient reasoning. For each given context, Ada-RS scores multiple sampled completions with an adaptive length-penalized reward then applies stochastic rejection sampling to retain only high-reward candidates (or preference pairs) for downstream optimization. We demonstrate how Ada-RS plugs into both preference pair (e.g. DPO) or grouped policy optimization strategies (e.g. DAPO). Using Qwen3-8B with LoRA on a synthetic tool call-oriented e-commerce benchmark, Ada-RS improves the accuracy-efficiency frontier over standard algorithms by reducing average output tokens by up to \ensuremath\sim80% and reducing thinking rate by up to \ensuremath\sim95% while maintaining or improving tool call accuracy. We further demonstrate that these gains generalize across model scales (Qwen3-1.7B, 8B, 14B) and domains (\ensuremathτ 2-Bench airline and telecom). These results highlight that training signal selection is a powerful lever for efficient reasoning in latency-sensitive deployments.</abstract>
<identifier type="citekey">ge-etal-2026-ada</identifier>
<location>
<url>https://aclanthology.org/2026.acl-industry.88/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1265</start>
<end>1276</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Ada-RS: Adaptive Rejection Sampling for Selective Thinking
%A Ge, Yirou
%A Li, Yixi
%A Chiu, Alec M.
%A Shekhar, Shivani
%A Pan, Zijie
%A Thangali, Avinash
%A Chuang, Yun-Shiuan
%A Kulkarni, Chaitanya
%A Kona, Uma
%A Pang, Linsey
%A Mehrotra, Prakhar
%Y Li, Yunyao
%Y Rehm, Georg
%Y Tu, Mei
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-394-4
%F ge-etal-2026-ada
%X Large language models (LLMs) are increasingly being deployed in cost- and latency-sensitive settings. While chain-of-thought improves reasoning, it can waste tokens on simple requests. We study selective thinking for tool-using LLMs and introduce Adaptive Rejection Sampling (Ada-RS), an algorithm-agnostic sample filtering framework for learning selective and efficient reasoning. For each given context, Ada-RS scores multiple sampled completions with an adaptive length-penalized reward then applies stochastic rejection sampling to retain only high-reward candidates (or preference pairs) for downstream optimization. We demonstrate how Ada-RS plugs into both preference pair (e.g. DPO) or grouped policy optimization strategies (e.g. DAPO). Using Qwen3-8B with LoRA on a synthetic tool call-oriented e-commerce benchmark, Ada-RS improves the accuracy-efficiency frontier over standard algorithms by reducing average output tokens by up to \ensuremath\sim80% and reducing thinking rate by up to \ensuremath\sim95% while maintaining or improving tool call accuracy. We further demonstrate that these gains generalize across model scales (Qwen3-1.7B, 8B, 14B) and domains (\ensuremathτ 2-Bench airline and telecom). These results highlight that training signal selection is a powerful lever for efficient reasoning in latency-sensitive deployments.
%U https://aclanthology.org/2026.acl-industry.88/
%P 1265-1276
Markdown (Informal)
[Ada-RS: Adaptive Rejection Sampling for Selective Thinking](https://aclanthology.org/2026.acl-industry.88/) (Ge et al., ACL 2026)
ACL
- Yirou Ge, Yixi Li, Alec M. Chiu, Shivani Shekhar, Zijie Pan, Avinash Thangali, Yun-Shiuan Chuang, Chaitanya Kulkarni, Uma Kona, Linsey Pang, and Prakhar Mehrotra. 2026. Ada-RS: Adaptive Rejection Sampling for Selective Thinking. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026), pages 1265–1276, San Diego, California, USA. Association for Computational Linguistics.