@inproceedings{guo-etal-2026-counteracting,
title = "Counteracting the Matthew Effect in Self-Improvement of {LVLM}s through Head-Tail Re-balancing",
author = "Guo, Xin and
Xi, Zhiheng and
Ding, Yiwen and
Zhai, Yitao and
Shi, Xiaowei and
Cai, Xunliang and
Gui, Tao and
Zhang, Qi and
Huang, Xuanjing",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1010/",
pages = "22104--22121",
ISBN = "979-8-89176-390-6",
abstract = "Self-improvement has emerged as a mainstream paradigm for advancing the reasoning capabilities of large vision{--}language models (LVLMs), where models explore and learn from successful trajectories iteratively. However, we identify a critical imbalance during this process: the model readily generates high-quality trajectories for simple queries (i.e., head data) but struggles with complex ones (i.e., tail data). This bias drives the optimization to disproportionately prioritize simple reasoning skills, while inhibiting the acquisition of complex capabilities. As iterations progress, this imbalance becomes more acute{---}a dynamic we term the ``Matthew effect'', ultimately stalling performance gains. To mitigate this, we approach head-tail re-balance during the exploration-and-learning process from two perspectives: distribution-reshaping and trajectory-resampling. Extensive experiments on Qwen2-VL-7B-Instruct and InternVL2.5-4B models across visual reasoning tasks demonstrate that our methods consistently improve visual reasoning capabilities, outperforming vanilla self-improvement baselines by an average of 3.86 points."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guo-etal-2026-counteracting">
<titleInfo>
<title>Counteracting the Matthew Effect in Self-Improvement of LVLMs through Head-Tail Re-balancing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiheng</namePart>
<namePart type="family">Xi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yiwen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yitao</namePart>
<namePart type="family">Zhai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaowei</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xunliang</namePart>
<namePart type="family">Cai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Gui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Self-improvement has emerged as a mainstream paradigm for advancing the reasoning capabilities of large vision–language models (LVLMs), where models explore and learn from successful trajectories iteratively. However, we identify a critical imbalance during this process: the model readily generates high-quality trajectories for simple queries (i.e., head data) but struggles with complex ones (i.e., tail data). This bias drives the optimization to disproportionately prioritize simple reasoning skills, while inhibiting the acquisition of complex capabilities. As iterations progress, this imbalance becomes more acute—a dynamic we term the “Matthew effect”, ultimately stalling performance gains. To mitigate this, we approach head-tail re-balance during the exploration-and-learning process from two perspectives: distribution-reshaping and trajectory-resampling. Extensive experiments on Qwen2-VL-7B-Instruct and InternVL2.5-4B models across visual reasoning tasks demonstrate that our methods consistently improve visual reasoning capabilities, outperforming vanilla self-improvement baselines by an average of 3.86 points.</abstract>
<identifier type="citekey">guo-etal-2026-counteracting</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1010/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>22104</start>
<end>22121</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Counteracting the Matthew Effect in Self-Improvement of LVLMs through Head-Tail Re-balancing
%A Guo, Xin
%A Xi, Zhiheng
%A Ding, Yiwen
%A Zhai, Yitao
%A Shi, Xiaowei
%A Cai, Xunliang
%A Gui, Tao
%A Zhang, Qi
%A Huang, Xuanjing
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F guo-etal-2026-counteracting
%X Self-improvement has emerged as a mainstream paradigm for advancing the reasoning capabilities of large vision–language models (LVLMs), where models explore and learn from successful trajectories iteratively. However, we identify a critical imbalance during this process: the model readily generates high-quality trajectories for simple queries (i.e., head data) but struggles with complex ones (i.e., tail data). This bias drives the optimization to disproportionately prioritize simple reasoning skills, while inhibiting the acquisition of complex capabilities. As iterations progress, this imbalance becomes more acute—a dynamic we term the “Matthew effect”, ultimately stalling performance gains. To mitigate this, we approach head-tail re-balance during the exploration-and-learning process from two perspectives: distribution-reshaping and trajectory-resampling. Extensive experiments on Qwen2-VL-7B-Instruct and InternVL2.5-4B models across visual reasoning tasks demonstrate that our methods consistently improve visual reasoning capabilities, outperforming vanilla self-improvement baselines by an average of 3.86 points.
%U https://aclanthology.org/2026.acl-long.1010/
%P 22104-22121
Markdown (Informal)
[Counteracting the Matthew Effect in Self-Improvement of LVLMs through Head-Tail Re-balancing](https://aclanthology.org/2026.acl-long.1010/) (Guo et al., ACL 2026)
ACL
- Xin Guo, Zhiheng Xi, Yiwen Ding, Yitao Zhai, Xiaowei Shi, Xunliang Cai, Tao Gui, Qi Zhang, and Xuanjing Huang. 2026. Counteracting the Matthew Effect in Self-Improvement of LVLMs through Head-Tail Re-balancing. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 22104–22121, San Diego, California, United States. Association for Computational Linguistics.