@inproceedings{tang-etal-2026-specialization,
title = "Specialization through Collaboration: Understanding Expert Interaction in Mixture-of-Expert Large Language Models",
author = "Tang, Yuanbo and
Zhang, Naifan and
Tang, Yan and
Chen, Meixuan and
Huang, Shuhan and
Cao, Tingyu and
Li, Yang",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.104/",
pages = "2326--2339",
ISBN = "979-8-89176-380-7",
abstract = "Mixture-of-Experts (MoE) based large language models (LLMs) have gained popularity due to their multi-task capability, where each input token activates only a subset of ``expert'' subnetworks. However, whether each expert can truly specialize to a certain task remains poorly understood, while activation analysis shows frequent cross-layer co-activation of experts for the same input, resembling a collaborative behavior. In this paper, we use a dictionary learning approach to show that experts in MoE LLMs form hierarchical and semantically coherent collaborative groups that correspond to specific linguistic and cognitive functions (e.g., mathematical reasoning, syntactic processing), mirroring specialized functional region observed in neuroscience. Furthermore, leveraging these discovered expert groups enables significant model compression with minimal performance degradation, outperforming existing methods by 2.5{\%} while enabling up to 50{\%} expert reduction. These findings provide the first systematic analysis of expert collaboration mechanisms in MoE LLMs, revealing that specialization emerges from joint activation of experts across all layers. We further developed an interactive visualization platform that enables researchers to explore expert collaboration patterns and their semantic associations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tang-etal-2026-specialization">
<titleInfo>
<title>Specialization through Collaboration: Understanding Expert Interaction in Mixture-of-Expert Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuanbo</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naifan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meixuan</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuhan</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingyu</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Mixture-of-Experts (MoE) based large language models (LLMs) have gained popularity due to their multi-task capability, where each input token activates only a subset of “expert” subnetworks. However, whether each expert can truly specialize to a certain task remains poorly understood, while activation analysis shows frequent cross-layer co-activation of experts for the same input, resembling a collaborative behavior. In this paper, we use a dictionary learning approach to show that experts in MoE LLMs form hierarchical and semantically coherent collaborative groups that correspond to specific linguistic and cognitive functions (e.g., mathematical reasoning, syntactic processing), mirroring specialized functional region observed in neuroscience. Furthermore, leveraging these discovered expert groups enables significant model compression with minimal performance degradation, outperforming existing methods by 2.5% while enabling up to 50% expert reduction. These findings provide the first systematic analysis of expert collaboration mechanisms in MoE LLMs, revealing that specialization emerges from joint activation of experts across all layers. We further developed an interactive visualization platform that enables researchers to explore expert collaboration patterns and their semantic associations.</abstract>
<identifier type="citekey">tang-etal-2026-specialization</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.104/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>2326</start>
<end>2339</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Specialization through Collaboration: Understanding Expert Interaction in Mixture-of-Expert Large Language Models
%A Tang, Yuanbo
%A Zhang, Naifan
%A Tang, Yan
%A Chen, Meixuan
%A Huang, Shuhan
%A Cao, Tingyu
%A Li, Yang
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F tang-etal-2026-specialization
%X Mixture-of-Experts (MoE) based large language models (LLMs) have gained popularity due to their multi-task capability, where each input token activates only a subset of “expert” subnetworks. However, whether each expert can truly specialize to a certain task remains poorly understood, while activation analysis shows frequent cross-layer co-activation of experts for the same input, resembling a collaborative behavior. In this paper, we use a dictionary learning approach to show that experts in MoE LLMs form hierarchical and semantically coherent collaborative groups that correspond to specific linguistic and cognitive functions (e.g., mathematical reasoning, syntactic processing), mirroring specialized functional region observed in neuroscience. Furthermore, leveraging these discovered expert groups enables significant model compression with minimal performance degradation, outperforming existing methods by 2.5% while enabling up to 50% expert reduction. These findings provide the first systematic analysis of expert collaboration mechanisms in MoE LLMs, revealing that specialization emerges from joint activation of experts across all layers. We further developed an interactive visualization platform that enables researchers to explore expert collaboration patterns and their semantic associations.
%U https://aclanthology.org/2026.eacl-long.104/
%P 2326-2339
Markdown (Informal)
[Specialization through Collaboration: Understanding Expert Interaction in Mixture-of-Expert Large Language Models](https://aclanthology.org/2026.eacl-long.104/) (Tang et al., EACL 2026)
ACL