@inproceedings{shani-basirat-2025-language,
title = "Language Dominance in Multilingual Large Language Models",
author = "Shani, Nadav and
Basirat, Ali",
editor = "Belinkov, Yonatan and
Mueller, Aaron and
Kim, Najoung and
Mohebbi, Hosein and
Chen, Hanjie and
Arad, Dana and
Sarti, Gabriele",
booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.blackboxnlp-1.7/",
pages = "137--148",
ISBN = "979-8-89176-346-3",
abstract = "This paper investigates the language dominance hypothesis in multilingual large language models (LLMs), which posits that cross-lingual understanding is facilitated by an implicit translation into a dominant language seen more frequently during pretraining. We propose a novel approach to quantify how languages influence one another in a language model. By analyzing the hidden states across intermediate layers of language models, we model interactions between language-specific embedding spaces using Gaussian Mixture Models. Our results reveal only weak signs of language dominance in middle layers, affecting only a fraction of tokens. Our findings suggest that multilingual processing in LLMs is better explained by language-specific and shared representational spaces rather than internal translation into a single dominant language."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shani-basirat-2025-language">
<titleInfo>
<title>Language Dominance in Multilingual Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nadav</namePart>
<namePart type="family">Shani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Basirat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Najoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Mohebbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanjie</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Arad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriele</namePart>
<namePart type="family">Sarti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-346-3</identifier>
</relatedItem>
<abstract>This paper investigates the language dominance hypothesis in multilingual large language models (LLMs), which posits that cross-lingual understanding is facilitated by an implicit translation into a dominant language seen more frequently during pretraining. We propose a novel approach to quantify how languages influence one another in a language model. By analyzing the hidden states across intermediate layers of language models, we model interactions between language-specific embedding spaces using Gaussian Mixture Models. Our results reveal only weak signs of language dominance in middle layers, affecting only a fraction of tokens. Our findings suggest that multilingual processing in LLMs is better explained by language-specific and shared representational spaces rather than internal translation into a single dominant language.</abstract>
<identifier type="citekey">shani-basirat-2025-language</identifier>
<location>
<url>https://aclanthology.org/2025.blackboxnlp-1.7/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>137</start>
<end>148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Dominance in Multilingual Large Language Models
%A Shani, Nadav
%A Basirat, Ali
%Y Belinkov, Yonatan
%Y Mueller, Aaron
%Y Kim, Najoung
%Y Mohebbi, Hosein
%Y Chen, Hanjie
%Y Arad, Dana
%Y Sarti, Gabriele
%S Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-346-3
%F shani-basirat-2025-language
%X This paper investigates the language dominance hypothesis in multilingual large language models (LLMs), which posits that cross-lingual understanding is facilitated by an implicit translation into a dominant language seen more frequently during pretraining. We propose a novel approach to quantify how languages influence one another in a language model. By analyzing the hidden states across intermediate layers of language models, we model interactions between language-specific embedding spaces using Gaussian Mixture Models. Our results reveal only weak signs of language dominance in middle layers, affecting only a fraction of tokens. Our findings suggest that multilingual processing in LLMs is better explained by language-specific and shared representational spaces rather than internal translation into a single dominant language.
%U https://aclanthology.org/2025.blackboxnlp-1.7/
%P 137-148
Markdown (Informal)
[Language Dominance in Multilingual Large Language Models](https://aclanthology.org/2025.blackboxnlp-1.7/) (Shani & Basirat, BlackboxNLP 2025)
ACL