@inproceedings{zhang-etal-2025-arxsa,
title = "{ARXSA}: A General Negative Feedback Control Theory in Vision-Language Models",
author = "Zhang, Zeyu and
Chen, Tianqi and
Todo, Yuki",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.591/",
pages = "11100--11110",
ISBN = "979-8-89176-335-7",
abstract = "The Transformer model has been increasingly applied across various domains, driven by the self-attention mechanism, which offers robust data processing capabilities and has substantially contributed to the advancement of the model. In the self-attention mechanism, three core matrices from the same data batch are computed together to determine correlations between input elements. Drawing inspiration from the efficiency and stability conferred by negative feedback structures in predictive control systems, the concept of vertical training was introduced to integrate data from multiple batches. Accordingly, this paper proposes an autoregressive with exogenous inputs (ARX) approach for the self-attention mechanism, transforming the Encoder block into a negative feedback predictive control system. A network architecture based on this method is also proposed, enabling the autoregressive with exogenous inputs for self-attention to transmit data from batches at previous time points. The effectiveness of the proposed approach is validated through comparative experimental evaluations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-arxsa">
<titleInfo>
<title>ARXSA: A General Negative Feedback Control Theory in Vision-Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zeyu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuki</namePart>
<namePart type="family">Todo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>The Transformer model has been increasingly applied across various domains, driven by the self-attention mechanism, which offers robust data processing capabilities and has substantially contributed to the advancement of the model. In the self-attention mechanism, three core matrices from the same data batch are computed together to determine correlations between input elements. Drawing inspiration from the efficiency and stability conferred by negative feedback structures in predictive control systems, the concept of vertical training was introduced to integrate data from multiple batches. Accordingly, this paper proposes an autoregressive with exogenous inputs (ARX) approach for the self-attention mechanism, transforming the Encoder block into a negative feedback predictive control system. A network architecture based on this method is also proposed, enabling the autoregressive with exogenous inputs for self-attention to transmit data from batches at previous time points. The effectiveness of the proposed approach is validated through comparative experimental evaluations.</abstract>
<identifier type="citekey">zhang-etal-2025-arxsa</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.591/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>11100</start>
<end>11110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ARXSA: A General Negative Feedback Control Theory in Vision-Language Models
%A Zhang, Zeyu
%A Chen, Tianqi
%A Todo, Yuki
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F zhang-etal-2025-arxsa
%X The Transformer model has been increasingly applied across various domains, driven by the self-attention mechanism, which offers robust data processing capabilities and has substantially contributed to the advancement of the model. In the self-attention mechanism, three core matrices from the same data batch are computed together to determine correlations between input elements. Drawing inspiration from the efficiency and stability conferred by negative feedback structures in predictive control systems, the concept of vertical training was introduced to integrate data from multiple batches. Accordingly, this paper proposes an autoregressive with exogenous inputs (ARX) approach for the self-attention mechanism, transforming the Encoder block into a negative feedback predictive control system. A network architecture based on this method is also proposed, enabling the autoregressive with exogenous inputs for self-attention to transmit data from batches at previous time points. The effectiveness of the proposed approach is validated through comparative experimental evaluations.
%U https://aclanthology.org/2025.findings-emnlp.591/
%P 11100-11110
Markdown (Informal)
[ARXSA: A General Negative Feedback Control Theory in Vision-Language Models](https://aclanthology.org/2025.findings-emnlp.591/) (Zhang et al., Findings 2025)
ACL