@inproceedings{liu-etal-2025-mixture,
title = "Mixture of insigh{T}ful Experts ({M}o{TE}): The Synergy of Reasoning Chains and Expert Mixtures in Self-Alignment",
author = "Liu, Zhili and
Gou, Yunhao and
Chen, Kai and
Hong, Lanqing and
Gao, Jiahui and
Mi, Fei and
Zhang, Yu and
Li, Zhenguo and
Jiang, Xin and
Liu, Qun and
Kwok, James",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.151/",
doi = "10.18653/v1/2025.acl-long.151",
pages = "3022--3038",
ISBN = "979-8-89176-251-0",
abstract = "As the capabilities of large language models (LLMs) continue to expand, aligning these models with human values remains a significant challenge. Recent studies show that reasoning abilities contribute significantly to model safety, while integrating Mixture-of-Experts (MoE) architectures can further enhance alignment.In this work, we address a fundamental question:\textit{How to effectively incorporate reasoning abilitiesand MoE architectures into self-alignment processin LLMs?}We propose Mixture of insighTful Experts (MoTE), a novel framework that synergistically combines reasoning chains and expert mixtures to improve self-alignments.From a data perspective, MoTE employs a structured reasoning chain comprising four key stages: Question Analysis, Answer Guidance, Safe Answer, and Safety Checking. This approach enhances safety through multi-step reasoning and proves effective even for smaller and less powerful LLMs (e.g., 7B models). From an architectural perspective, MoTE adopts a multi-LoRA framework with step-level routing, where each expert is dedicated to a specific reasoning step. This design eliminates the need for balance losses, ensures stable training, and supports adaptive inference lengths. Experimental results demonstrate that MoTE significantly improves model safety, jailbreak resistance, and over-refusal capabilities, achieving performance comparable to OpenAI{'}s state-of-the-art o1 model."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2025-mixture">
<titleInfo>
<title>Mixture of insighTful Experts (MoTE): The Synergy of Reasoning Chains and Expert Mixtures in Self-Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhili</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunhao</namePart>
<namePart type="family">Gou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lanqing</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahui</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Mi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenguo</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qun</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Kwok</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>As the capabilities of large language models (LLMs) continue to expand, aligning these models with human values remains a significant challenge. Recent studies show that reasoning abilities contribute significantly to model safety, while integrating Mixture-of-Experts (MoE) architectures can further enhance alignment.In this work, we address a fundamental question:How to effectively incorporate reasoning abilitiesand MoE architectures into self-alignment processin LLMs?We propose Mixture of insighTful Experts (MoTE), a novel framework that synergistically combines reasoning chains and expert mixtures to improve self-alignments.From a data perspective, MoTE employs a structured reasoning chain comprising four key stages: Question Analysis, Answer Guidance, Safe Answer, and Safety Checking. This approach enhances safety through multi-step reasoning and proves effective even for smaller and less powerful LLMs (e.g., 7B models). From an architectural perspective, MoTE adopts a multi-LoRA framework with step-level routing, where each expert is dedicated to a specific reasoning step. This design eliminates the need for balance losses, ensures stable training, and supports adaptive inference lengths. Experimental results demonstrate that MoTE significantly improves model safety, jailbreak resistance, and over-refusal capabilities, achieving performance comparable to OpenAI’s state-of-the-art o1 model.</abstract>
<identifier type="citekey">liu-etal-2025-mixture</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.151</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.151/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>3022</start>
<end>3038</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mixture of insighTful Experts (MoTE): The Synergy of Reasoning Chains and Expert Mixtures in Self-Alignment
%A Liu, Zhili
%A Gou, Yunhao
%A Chen, Kai
%A Hong, Lanqing
%A Gao, Jiahui
%A Mi, Fei
%A Zhang, Yu
%A Li, Zhenguo
%A Jiang, Xin
%A Liu, Qun
%A Kwok, James
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F liu-etal-2025-mixture
%X As the capabilities of large language models (LLMs) continue to expand, aligning these models with human values remains a significant challenge. Recent studies show that reasoning abilities contribute significantly to model safety, while integrating Mixture-of-Experts (MoE) architectures can further enhance alignment.In this work, we address a fundamental question:How to effectively incorporate reasoning abilitiesand MoE architectures into self-alignment processin LLMs?We propose Mixture of insighTful Experts (MoTE), a novel framework that synergistically combines reasoning chains and expert mixtures to improve self-alignments.From a data perspective, MoTE employs a structured reasoning chain comprising four key stages: Question Analysis, Answer Guidance, Safe Answer, and Safety Checking. This approach enhances safety through multi-step reasoning and proves effective even for smaller and less powerful LLMs (e.g., 7B models). From an architectural perspective, MoTE adopts a multi-LoRA framework with step-level routing, where each expert is dedicated to a specific reasoning step. This design eliminates the need for balance losses, ensures stable training, and supports adaptive inference lengths. Experimental results demonstrate that MoTE significantly improves model safety, jailbreak resistance, and over-refusal capabilities, achieving performance comparable to OpenAI’s state-of-the-art o1 model.
%R 10.18653/v1/2025.acl-long.151
%U https://aclanthology.org/2025.acl-long.151/
%U https://doi.org/10.18653/v1/2025.acl-long.151
%P 3022-3038
Markdown (Informal)
[Mixture of insighTful Experts (MoTE): The Synergy of Reasoning Chains and Expert Mixtures in Self-Alignment](https://aclanthology.org/2025.acl-long.151/) (Liu et al., ACL 2025)
ACL
- Zhili Liu, Yunhao Gou, Kai Chen, Lanqing Hong, Jiahui Gao, Fei Mi, Yu Zhang, Zhenguo Li, Xin Jiang, Qun Liu, and James Kwok. 2025. Mixture of insighTful Experts (MoTE): The Synergy of Reasoning Chains and Expert Mixtures in Self-Alignment. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 3022–3038, Vienna, Austria. Association for Computational Linguistics.