@inproceedings{nguyen-etal-2025-structured,
title = "Structured Pruning for Diverse Best-of-$N$ Reasoning Optimization",
author = "Nguyen, Hieu Trung and
Nguyen, Bao and
Nguyen, Viet Anh",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1225/",
doi = "10.18653/v1/2025.findings-acl.1225",
pages = "23911--23922",
ISBN = "979-8-89176-256-5",
abstract = "Model pruning in transformer-based language models, traditionally seen as a means of computational savings, can enhance the model{'}s reasoning capabilities. In this work, we uncover the surprising phenomenon that the selective pruning of certain attention heads leads to improvements in reasoning performance, particularly on challenging tasks. Motivated by this observation, we propose SPRINT, a novel contrastive learning framework that dynamically selects the optimal head and layer to prune during inference. By aligning question embeddings with head embeddings, our approach identifies those pruned-head configurations that result in more accurate reasoning. Extensive experiments on the MATH dataset demonstrate that our method significantly outperforms traditional best-of-$N$ and random head selection strategies on the MATH500 and GSM8K datasets."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2025-structured">
<titleInfo>
<title>Structured Pruning for Diverse Best-of-N Reasoning Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hieu</namePart>
<namePart type="given">Trung</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bao</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viet</namePart>
<namePart type="given">Anh</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Model pruning in transformer-based language models, traditionally seen as a means of computational savings, can enhance the model’s reasoning capabilities. In this work, we uncover the surprising phenomenon that the selective pruning of certain attention heads leads to improvements in reasoning performance, particularly on challenging tasks. Motivated by this observation, we propose SPRINT, a novel contrastive learning framework that dynamically selects the optimal head and layer to prune during inference. By aligning question embeddings with head embeddings, our approach identifies those pruned-head configurations that result in more accurate reasoning. Extensive experiments on the MATH dataset demonstrate that our method significantly outperforms traditional best-of-N and random head selection strategies on the MATH500 and GSM8K datasets.</abstract>
<identifier type="citekey">nguyen-etal-2025-structured</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1225</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1225/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>23911</start>
<end>23922</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Structured Pruning for Diverse Best-of-N Reasoning Optimization
%A Nguyen, Hieu Trung
%A Nguyen, Bao
%A Nguyen, Viet Anh
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F nguyen-etal-2025-structured
%X Model pruning in transformer-based language models, traditionally seen as a means of computational savings, can enhance the model’s reasoning capabilities. In this work, we uncover the surprising phenomenon that the selective pruning of certain attention heads leads to improvements in reasoning performance, particularly on challenging tasks. Motivated by this observation, we propose SPRINT, a novel contrastive learning framework that dynamically selects the optimal head and layer to prune during inference. By aligning question embeddings with head embeddings, our approach identifies those pruned-head configurations that result in more accurate reasoning. Extensive experiments on the MATH dataset demonstrate that our method significantly outperforms traditional best-of-N and random head selection strategies on the MATH500 and GSM8K datasets.
%R 10.18653/v1/2025.findings-acl.1225
%U https://aclanthology.org/2025.findings-acl.1225/
%U https://doi.org/10.18653/v1/2025.findings-acl.1225
%P 23911-23922
Markdown (Informal)
[Structured Pruning for Diverse Best-of-N Reasoning Optimization](https://aclanthology.org/2025.findings-acl.1225/) (Nguyen et al., Findings 2025)
ACL