@inproceedings{xie-etal-2025-language,
title = "Can Language Neuron Intervention Reduce Non-Target Language Output?",
author = "Xie, Suchun and
Kim, Hwichan and
Sasaki, Shota and
Yamada, Kosuke and
Suzuki, Jun",
editor = "Belinkov, Yonatan and
Mueller, Aaron and
Kim, Najoung and
Mohebbi, Hosein and
Chen, Hanjie and
Arad, Dana and
Sarti, Gabriele",
booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.blackboxnlp-1.26/",
pages = "452--466",
ISBN = "979-8-89176-346-3",
abstract = "Large language models (LLMs) often fail togenerate text in the intended target language,particularly in non-English interactions. Con-currently, recent work has explored LanguageNeuron Intervention (LNI) as a promising tech-nique for steering output language. In thispaper, we re-evaluate LNI in more practicalscenarios{---}specifically with instruction-tunedmodels and prompts that explicitly specify thetarget language. Our experiments show thatwhile LNI also shows potential in such practi-cal scenarios, its average effect is limited andunstable across models and tasks, with a 0.83{\%}reduction in undesired language output and a0.1{\%} improvement in performance. Our furtheranalysis identifies two key factors for LNI{'}slimitation: (1) LNI affects both the output lan-guage and the content semantics, making ithard to control one without affecting the other,which explains the weak performance gains. (2)LNI increases the target language token proba-bilities, but they often remain below the top-1generation threshold, resulting in failure to gen-erate the target language in most cases. Ourresults highlight both the potential and limi-tations of LNI, paving the way for future im-provements"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xie-etal-2025-language">
<titleInfo>
<title>Can Language Neuron Intervention Reduce Non-Target Language Output?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Suchun</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hwichan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shota</namePart>
<namePart type="family">Sasaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kosuke</namePart>
<namePart type="family">Yamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Najoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Mohebbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanjie</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Arad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriele</namePart>
<namePart type="family">Sarti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-346-3</identifier>
</relatedItem>
<abstract>Large language models (LLMs) often fail togenerate text in the intended target language,particularly in non-English interactions. Con-currently, recent work has explored LanguageNeuron Intervention (LNI) as a promising tech-nique for steering output language. In thispaper, we re-evaluate LNI in more practicalscenarios—specifically with instruction-tunedmodels and prompts that explicitly specify thetarget language. Our experiments show thatwhile LNI also shows potential in such practi-cal scenarios, its average effect is limited andunstable across models and tasks, with a 0.83%reduction in undesired language output and a0.1% improvement in performance. Our furtheranalysis identifies two key factors for LNI’slimitation: (1) LNI affects both the output lan-guage and the content semantics, making ithard to control one without affecting the other,which explains the weak performance gains. (2)LNI increases the target language token proba-bilities, but they often remain below the top-1generation threshold, resulting in failure to gen-erate the target language in most cases. Ourresults highlight both the potential and limi-tations of LNI, paving the way for future im-provements</abstract>
<identifier type="citekey">xie-etal-2025-language</identifier>
<location>
<url>https://aclanthology.org/2025.blackboxnlp-1.26/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>452</start>
<end>466</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Language Neuron Intervention Reduce Non-Target Language Output?
%A Xie, Suchun
%A Kim, Hwichan
%A Sasaki, Shota
%A Yamada, Kosuke
%A Suzuki, Jun
%Y Belinkov, Yonatan
%Y Mueller, Aaron
%Y Kim, Najoung
%Y Mohebbi, Hosein
%Y Chen, Hanjie
%Y Arad, Dana
%Y Sarti, Gabriele
%S Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-346-3
%F xie-etal-2025-language
%X Large language models (LLMs) often fail togenerate text in the intended target language,particularly in non-English interactions. Con-currently, recent work has explored LanguageNeuron Intervention (LNI) as a promising tech-nique for steering output language. In thispaper, we re-evaluate LNI in more practicalscenarios—specifically with instruction-tunedmodels and prompts that explicitly specify thetarget language. Our experiments show thatwhile LNI also shows potential in such practi-cal scenarios, its average effect is limited andunstable across models and tasks, with a 0.83%reduction in undesired language output and a0.1% improvement in performance. Our furtheranalysis identifies two key factors for LNI’slimitation: (1) LNI affects both the output lan-guage and the content semantics, making ithard to control one without affecting the other,which explains the weak performance gains. (2)LNI increases the target language token proba-bilities, but they often remain below the top-1generation threshold, resulting in failure to gen-erate the target language in most cases. Ourresults highlight both the potential and limi-tations of LNI, paving the way for future im-provements
%U https://aclanthology.org/2025.blackboxnlp-1.26/
%P 452-466
Markdown (Informal)
[Can Language Neuron Intervention Reduce Non-Target Language Output?](https://aclanthology.org/2025.blackboxnlp-1.26/) (Xie et al., BlackboxNLP 2025)
ACL