@inproceedings{zong-etal-2026-one,
title = "One Pair Suffices: Unlocking Universal Zero-Shot Translation via Cross-Architecture Alignment",
author = "Zong, Hao and
Yuan, Cong Hu and
Bei, Chao and
Chen, Wentao and
Liu, Huan and
Huang, Kaiyu and
Huang, Degen",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.1912/",
pages = "41227--41237",
ISBN = "979-8-89176-390-6",
abstract = "Current paradigms for empowering Large Language Models (LLMs) with multilingual capabilities rely heavily on massive instruction tuning. We challenge this view, proposing that the barrier is topological alignment, not data quantity. We introduce Hybrid Cross-Alignment (HCA), fusing a frozen NLLB encoder with a Qwen decoder via a closed-loop dual-adapter architecture. HCA utilizes a Source-Side Adapter to precondition encoder features and a Query-Residual Adapter to preserve generative stability, bridged by an adaptive gated cross-modal interface. Our core discovery is Universal Alignment Generalization.'' We demonstrate that training HCA on a single language pair (German-English) unlocks state-of-the-art zero-shot transfer to dozens of unseen languages. Crucially, our Oracle'' experiments reveal that this single-pair training recovers over 96.7{\%} of the performance achievable by training on all available pairs. This proves that a universal, language-agnostic projection protocol exists. With a total inference footprint of 5.25B parameters, our model significantly outperforms larger baselines, surpassing TowerPlus-9B (+9.0 COMET on low-resource languages) and Aya-101 (13B). Furthermore, performance scales linearly with encoder size; upgrading from 600M to 1.3B yields immediate gains (+3.4 points on Gujarati) with minimal retraining cost."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zong-etal-2026-one">
<titleInfo>
<title>One Pair Suffices: Unlocking Universal Zero-Shot Translation via Cross-Architecture Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cong</namePart>
<namePart type="given">Hu</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao</namePart>
<namePart type="family">Bei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wentao</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Degen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Current paradigms for empowering Large Language Models (LLMs) with multilingual capabilities rely heavily on massive instruction tuning. We challenge this view, proposing that the barrier is topological alignment, not data quantity. We introduce Hybrid Cross-Alignment (HCA), fusing a frozen NLLB encoder with a Qwen decoder via a closed-loop dual-adapter architecture. HCA utilizes a Source-Side Adapter to precondition encoder features and a Query-Residual Adapter to preserve generative stability, bridged by an adaptive gated cross-modal interface. Our core discovery is Universal Alignment Generalization.” We demonstrate that training HCA on a single language pair (German-English) unlocks state-of-the-art zero-shot transfer to dozens of unseen languages. Crucially, our Oracle” experiments reveal that this single-pair training recovers over 96.7% of the performance achievable by training on all available pairs. This proves that a universal, language-agnostic projection protocol exists. With a total inference footprint of 5.25B parameters, our model significantly outperforms larger baselines, surpassing TowerPlus-9B (+9.0 COMET on low-resource languages) and Aya-101 (13B). Furthermore, performance scales linearly with encoder size; upgrading from 600M to 1.3B yields immediate gains (+3.4 points on Gujarati) with minimal retraining cost.</abstract>
<identifier type="citekey">zong-etal-2026-one</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.1912/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>41227</start>
<end>41237</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One Pair Suffices: Unlocking Universal Zero-Shot Translation via Cross-Architecture Alignment
%A Zong, Hao
%A Yuan, Cong Hu
%A Bei, Chao
%A Chen, Wentao
%A Liu, Huan
%A Huang, Kaiyu
%A Huang, Degen
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F zong-etal-2026-one
%X Current paradigms for empowering Large Language Models (LLMs) with multilingual capabilities rely heavily on massive instruction tuning. We challenge this view, proposing that the barrier is topological alignment, not data quantity. We introduce Hybrid Cross-Alignment (HCA), fusing a frozen NLLB encoder with a Qwen decoder via a closed-loop dual-adapter architecture. HCA utilizes a Source-Side Adapter to precondition encoder features and a Query-Residual Adapter to preserve generative stability, bridged by an adaptive gated cross-modal interface. Our core discovery is Universal Alignment Generalization.” We demonstrate that training HCA on a single language pair (German-English) unlocks state-of-the-art zero-shot transfer to dozens of unseen languages. Crucially, our Oracle” experiments reveal that this single-pair training recovers over 96.7% of the performance achievable by training on all available pairs. This proves that a universal, language-agnostic projection protocol exists. With a total inference footprint of 5.25B parameters, our model significantly outperforms larger baselines, surpassing TowerPlus-9B (+9.0 COMET on low-resource languages) and Aya-101 (13B). Furthermore, performance scales linearly with encoder size; upgrading from 600M to 1.3B yields immediate gains (+3.4 points on Gujarati) with minimal retraining cost.
%U https://aclanthology.org/2026.acl-long.1912/
%P 41227-41237
Markdown (Informal)
[One Pair Suffices: Unlocking Universal Zero-Shot Translation via Cross-Architecture Alignment](https://aclanthology.org/2026.acl-long.1912/) (Zong et al., ACL 2026)
ACL
- Hao Zong, Cong Hu Yuan, Chao Bei, Wentao Chen, Huan Liu, Kaiyu Huang, and Degen Huang. 2026. One Pair Suffices: Unlocking Universal Zero-Shot Translation via Cross-Architecture Alignment. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 41227–41237, San Diego, California, United States. Association for Computational Linguistics.