@inproceedings{liu-etal-2026-stop,
title = "Stop Hardening Everything: A Training-Free Neuron-Level Defense for Neural Ranking Models",
author = "Liu, Yu-An and
Zhang, Ruqing and
Song, Hongru and
Guo, Jiafeng and
Fan, Yixing and
Cheng, Xueqi",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1076/",
pages = "23474--23484",
ISBN = "979-8-89176-390-6",
abstract = "While neural ranking models (NRMs) have achieved state-of-the-art performance in information retrieval, they remain highly vulnerable to imperceptible adversarial perturbations. Existing defenses are predominantly data-centric, exemplified by adversarial training, which requires constructing large collections of adversarial examples. By treating NRMs as black boxes and indiscriminately optimizing all model parameters, these methods incur substantial computational cost and often degrade performance on clean data due to overfitting. In this paper, we advocate that adversarial vulnerability is not uniformly distributed across model parameters, but instead originates from specific internal units. We propose a paradigm shift toward a model-centric defense that addresses vulnerability at its architectural source, without requiring costly retraining or adversarial data generation. Specifically, we introduce Search in the Model, a novel training-free framework that performs fine-grained identification and rectification of vulnerable neurons directly within the model. By formulating neuron identification as a ranking problem, we develop a maximum marginal vulnerability criterion to precisely locate the top-$K$ neurons most responsible for model vulnerability, and apply targeted neuronal inverse perturbation to correct them. Extensive experiments on MS MARCO and TREC 19 show that our approach outperforms state-of-the-art baselines in both defense efficiency and robustness to seen and unseen attacks, while preserving strong performance on clean data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-stop">
<titleInfo>
<title>Stop Hardening Everything: A Training-Free Neuron-Level Defense for Neural Ranking Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu-An</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruqing</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongru</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiafeng</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixing</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xueqi</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>While neural ranking models (NRMs) have achieved state-of-the-art performance in information retrieval, they remain highly vulnerable to imperceptible adversarial perturbations. Existing defenses are predominantly data-centric, exemplified by adversarial training, which requires constructing large collections of adversarial examples. By treating NRMs as black boxes and indiscriminately optimizing all model parameters, these methods incur substantial computational cost and often degrade performance on clean data due to overfitting. In this paper, we advocate that adversarial vulnerability is not uniformly distributed across model parameters, but instead originates from specific internal units. We propose a paradigm shift toward a model-centric defense that addresses vulnerability at its architectural source, without requiring costly retraining or adversarial data generation. Specifically, we introduce Search in the Model, a novel training-free framework that performs fine-grained identification and rectification of vulnerable neurons directly within the model. By formulating neuron identification as a ranking problem, we develop a maximum marginal vulnerability criterion to precisely locate the top-K neurons most responsible for model vulnerability, and apply targeted neuronal inverse perturbation to correct them. Extensive experiments on MS MARCO and TREC 19 show that our approach outperforms state-of-the-art baselines in both defense efficiency and robustness to seen and unseen attacks, while preserving strong performance on clean data.</abstract>
<identifier type="citekey">liu-etal-2026-stop</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1076/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>23474</start>
<end>23484</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Stop Hardening Everything: A Training-Free Neuron-Level Defense for Neural Ranking Models
%A Liu, Yu-An
%A Zhang, Ruqing
%A Song, Hongru
%A Guo, Jiafeng
%A Fan, Yixing
%A Cheng, Xueqi
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F liu-etal-2026-stop
%X While neural ranking models (NRMs) have achieved state-of-the-art performance in information retrieval, they remain highly vulnerable to imperceptible adversarial perturbations. Existing defenses are predominantly data-centric, exemplified by adversarial training, which requires constructing large collections of adversarial examples. By treating NRMs as black boxes and indiscriminately optimizing all model parameters, these methods incur substantial computational cost and often degrade performance on clean data due to overfitting. In this paper, we advocate that adversarial vulnerability is not uniformly distributed across model parameters, but instead originates from specific internal units. We propose a paradigm shift toward a model-centric defense that addresses vulnerability at its architectural source, without requiring costly retraining or adversarial data generation. Specifically, we introduce Search in the Model, a novel training-free framework that performs fine-grained identification and rectification of vulnerable neurons directly within the model. By formulating neuron identification as a ranking problem, we develop a maximum marginal vulnerability criterion to precisely locate the top-K neurons most responsible for model vulnerability, and apply targeted neuronal inverse perturbation to correct them. Extensive experiments on MS MARCO and TREC 19 show that our approach outperforms state-of-the-art baselines in both defense efficiency and robustness to seen and unseen attacks, while preserving strong performance on clean data.
%U https://aclanthology.org/2026.acl-long.1076/
%P 23474-23484
Markdown (Informal)
[Stop Hardening Everything: A Training-Free Neuron-Level Defense for Neural Ranking Models](https://aclanthology.org/2026.acl-long.1076/) (Liu et al., ACL 2026)
ACL