@inproceedings{yao-etal-2026-par,
title = "{PAR}: Training-Free Positional Perturbation and Attention Recycling for Faithful {OCR}",
author = "Yao, Yao and
Liao, Manwen and
Zhang, Weitian and
Li, Zuchao and
Zhao, Hai",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1065/",
pages = "23258--23273",
ISBN = "979-8-89176-390-6",
abstract = "In high-precision scenarios, vision language models suffer from Linguistic Priors Hallucination. When processing familiar text, models tend to over-rely on internal parametric knowledge, effectively ``reciting'' the content rather than ``reading'' the image. In this paper, we first systematically investigate this phenomenon by constructing the GlitchText Probing Dataset. We discover that the model{'}s reliance on visual grounding diminishes significantly as the generation length increases. To mitigate this, we propose PAR (Positional Perturbation and Attention Recycling), a training-free, inference-time intervention framework. PAR consists of two parts: (1) Positional Perturbation (PP) injects structured phase noise into the rotary positional embeddings; (2) Foveal Attention Recycling (FAR) detects over-confident linguistic priors and dynamically redistributes attention mass back to important visual regions. Extensive experiments across state-of-the-art models, demonstrate that PAR significantly reduces hallucination rates (reducing CER by 12{\%}), particularly in long-context scenarios, while maintaining robust generalization on standard benchmarks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yao-etal-2026-par">
<titleInfo>
<title>PAR: Training-Free Positional Perturbation and Attention Recycling for Faithful OCR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yao</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manwen</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weitian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zuchao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>In high-precision scenarios, vision language models suffer from Linguistic Priors Hallucination. When processing familiar text, models tend to over-rely on internal parametric knowledge, effectively “reciting” the content rather than “reading” the image. In this paper, we first systematically investigate this phenomenon by constructing the GlitchText Probing Dataset. We discover that the model’s reliance on visual grounding diminishes significantly as the generation length increases. To mitigate this, we propose PAR (Positional Perturbation and Attention Recycling), a training-free, inference-time intervention framework. PAR consists of two parts: (1) Positional Perturbation (PP) injects structured phase noise into the rotary positional embeddings; (2) Foveal Attention Recycling (FAR) detects over-confident linguistic priors and dynamically redistributes attention mass back to important visual regions. Extensive experiments across state-of-the-art models, demonstrate that PAR significantly reduces hallucination rates (reducing CER by 12%), particularly in long-context scenarios, while maintaining robust generalization on standard benchmarks.</abstract>
<identifier type="citekey">yao-etal-2026-par</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1065/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>23258</start>
<end>23273</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PAR: Training-Free Positional Perturbation and Attention Recycling for Faithful OCR
%A Yao, Yao
%A Liao, Manwen
%A Zhang, Weitian
%A Li, Zuchao
%A Zhao, Hai
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F yao-etal-2026-par
%X In high-precision scenarios, vision language models suffer from Linguistic Priors Hallucination. When processing familiar text, models tend to over-rely on internal parametric knowledge, effectively “reciting” the content rather than “reading” the image. In this paper, we first systematically investigate this phenomenon by constructing the GlitchText Probing Dataset. We discover that the model’s reliance on visual grounding diminishes significantly as the generation length increases. To mitigate this, we propose PAR (Positional Perturbation and Attention Recycling), a training-free, inference-time intervention framework. PAR consists of two parts: (1) Positional Perturbation (PP) injects structured phase noise into the rotary positional embeddings; (2) Foveal Attention Recycling (FAR) detects over-confident linguistic priors and dynamically redistributes attention mass back to important visual regions. Extensive experiments across state-of-the-art models, demonstrate that PAR significantly reduces hallucination rates (reducing CER by 12%), particularly in long-context scenarios, while maintaining robust generalization on standard benchmarks.
%U https://aclanthology.org/2026.acl-long.1065/
%P 23258-23273
Markdown (Informal)
[PAR: Training-Free Positional Perturbation and Attention Recycling for Faithful OCR](https://aclanthology.org/2026.acl-long.1065/) (Yao et al., ACL 2026)
ACL