@inproceedings{kim-huh-2026-translation,
title = "Translation Is Not Representation: {E}nglish-Hub Routing in Cross-Lingual Bias Benchmarks",
author = "Kim, Hak Hyun and
Huh, Benjamin",
editor = "Ma, Weicheng and
Vosoughi, Soroush and
Gillani, Nabeel and
Coto-Solano, Rolando",
booktitle = "Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies ({S}tere{AC}u{LT} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.stereacult-1.11/",
pages = "116--125",
ISBN = "979-8-89176-408-8",
abstract = "Cross-lingual bias benchmarks such as JBBQ and KoBBQ translate English bias probes and compare scores across languages, assuming the translated probe measures the same construct. We test this assumption at the representation and behavioral levels using 13B-parameter models matched on architecture but differing in language-training regime. A multi-anchor logit lens shows that an English-centric model (Llama 2) processes Japanese and Korean inputs predominantly through English-script predictions in its middle layers, even where Centered Kernel Alignment (CKA) between languages is high: geometric convergence masks English-hub routing. Matched continual-adaptation comparisons show that target-language adaptation reduces this English-script mass: from 0.77 to 0.56 after Japanese adaptation (Swallow), and from 0.78 to 0.71 after Korean adaptation (koen), while balanced bilingual pretraining (LLM-jp) lowers it further to 0.19. Behaviorally, every model is more stereotype-biased in English than in Japanese, with gaps from 0.13 to 0.14, but this asymmetry is language-specific: in Korean it is weak and disappears after Korean adaptation, with Korean nearly as stereotype-leaning as English. Yet patching English hub states into target-language processing does not transplant this bias. Cross-lingual bias scores thus reflect genuine language-specific behavior, not an English-pivot artifact, even though the underlying representations are not comparable. We distill this dissociation between representation and behavior into a four-step audit protocol for translated bias benchmarks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-huh-2026-translation">
<titleInfo>
<title>Translation Is Not Representation: English-Hub Routing in Cross-Lingual Bias Benchmarks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hak</namePart>
<namePart type="given">Hyun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Huh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weicheng</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soroush</namePart>
<namePart type="family">Vosoughi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nabeel</namePart>
<namePart type="family">Gillani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-408-8</identifier>
</relatedItem>
<abstract>Cross-lingual bias benchmarks such as JBBQ and KoBBQ translate English bias probes and compare scores across languages, assuming the translated probe measures the same construct. We test this assumption at the representation and behavioral levels using 13B-parameter models matched on architecture but differing in language-training regime. A multi-anchor logit lens shows that an English-centric model (Llama 2) processes Japanese and Korean inputs predominantly through English-script predictions in its middle layers, even where Centered Kernel Alignment (CKA) between languages is high: geometric convergence masks English-hub routing. Matched continual-adaptation comparisons show that target-language adaptation reduces this English-script mass: from 0.77 to 0.56 after Japanese adaptation (Swallow), and from 0.78 to 0.71 after Korean adaptation (koen), while balanced bilingual pretraining (LLM-jp) lowers it further to 0.19. Behaviorally, every model is more stereotype-biased in English than in Japanese, with gaps from 0.13 to 0.14, but this asymmetry is language-specific: in Korean it is weak and disappears after Korean adaptation, with Korean nearly as stereotype-leaning as English. Yet patching English hub states into target-language processing does not transplant this bias. Cross-lingual bias scores thus reflect genuine language-specific behavior, not an English-pivot artifact, even though the underlying representations are not comparable. We distill this dissociation between representation and behavior into a four-step audit protocol for translated bias benchmarks.</abstract>
<identifier type="citekey">kim-huh-2026-translation</identifier>
<location>
<url>https://aclanthology.org/2026.stereacult-1.11/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>116</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Translation Is Not Representation: English-Hub Routing in Cross-Lingual Bias Benchmarks
%A Kim, Hak Hyun
%A Huh, Benjamin
%Y Ma, Weicheng
%Y Vosoughi, Soroush
%Y Gillani, Nabeel
%Y Coto-Solano, Rolando
%S Proceedings of the 1st Workshop on Stereotypes Across Cultures in Language Technologies (StereACuLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-408-8
%F kim-huh-2026-translation
%X Cross-lingual bias benchmarks such as JBBQ and KoBBQ translate English bias probes and compare scores across languages, assuming the translated probe measures the same construct. We test this assumption at the representation and behavioral levels using 13B-parameter models matched on architecture but differing in language-training regime. A multi-anchor logit lens shows that an English-centric model (Llama 2) processes Japanese and Korean inputs predominantly through English-script predictions in its middle layers, even where Centered Kernel Alignment (CKA) between languages is high: geometric convergence masks English-hub routing. Matched continual-adaptation comparisons show that target-language adaptation reduces this English-script mass: from 0.77 to 0.56 after Japanese adaptation (Swallow), and from 0.78 to 0.71 after Korean adaptation (koen), while balanced bilingual pretraining (LLM-jp) lowers it further to 0.19. Behaviorally, every model is more stereotype-biased in English than in Japanese, with gaps from 0.13 to 0.14, but this asymmetry is language-specific: in Korean it is weak and disappears after Korean adaptation, with Korean nearly as stereotype-leaning as English. Yet patching English hub states into target-language processing does not transplant this bias. Cross-lingual bias scores thus reflect genuine language-specific behavior, not an English-pivot artifact, even though the underlying representations are not comparable. We distill this dissociation between representation and behavior into a four-step audit protocol for translated bias benchmarks.
%U https://aclanthology.org/2026.stereacult-1.11/
%P 116-125
Markdown (Informal)
[Translation Is Not Representation: English-Hub Routing in Cross-Lingual Bias Benchmarks](https://aclanthology.org/2026.stereacult-1.11/) (Kim & Huh, StereACuLT 2026)
ACL