@inproceedings{corrado-etal-2025-automixalign,
title = "{A}uto{M}ix{A}lign: Adaptive Data Mixing for Multi-Task Preference Optimization in {LLM}s",
author = "Corrado, Nicholas E. and
Katz-Samuels, Julian and
Devraj, Adithya M and
Yun, Hyokun and
Zhang, Chao and
Xu, Yi and
Pan, Yi and
Yin, Bing and
Chilimbi, Trishul",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.990/",
doi = "10.18653/v1/2025.acl-long.990",
pages = "20234--20258",
ISBN = "979-8-89176-251-0",
abstract = "When aligning large language models (LLMs), their performance across various tasks (such as being helpful, harmless, and honest) is heavily influenced by the composition of the training data. However, it is difficult to determine what mixture of data should be used to produce a model with strong performance across all tasks. Existing approaches rely on large ablation studies, heuristics, or human intuition, though these can be prohibitively expensive and suboptimal. We study this problem in the context of preference optimization via DPO and propose a novel and theoretically justified algorithm, AutoMixAlign (AMA), that adaptively mixes datasets during LLM training to balance performance across multiple tasks. AMA first trains \textit{specialist models} for each task to determine losses that corresponding to strong task performance. Next, AMA trains a generalist model using a novel minimax optimization that prioritizes tasks for which generalist model losses are furthest from specialist model losses. We introduce two algorithms to optimize this problem: (1) AMA-R adaptively reweights the objective to prioritize tasks, and (2) AMA-S adaptively adjusts how much data is sampled from each task to prioritize tasks. Both algorithms achieve a convergence rate of $O(1/\sqrt{T})$ in the convex case. AMA-R{'}s convergence result immediately follows from Sagawa et. al, 2019, and we provide a convergence proof for AMA-S using techniques from online learning such as EXP3 (Auer et. al, 2002). We evaluate AMA on several multitask alignment setups, and observe that AMA outperforms the standard alignment approach which simply optimizes the total loss across all tasks and also outperforms model-merging methods."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corrado-etal-2025-automixalign">
<titleInfo>
<title>AutoMixAlign: Adaptive Data Mixing for Multi-Task Preference Optimization in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Corrado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julian</namePart>
<namePart type="family">Katz-Samuels</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adithya</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Devraj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyokun</namePart>
<namePart type="family">Yun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Pan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trishul</namePart>
<namePart type="family">Chilimbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>When aligning large language models (LLMs), their performance across various tasks (such as being helpful, harmless, and honest) is heavily influenced by the composition of the training data. However, it is difficult to determine what mixture of data should be used to produce a model with strong performance across all tasks. Existing approaches rely on large ablation studies, heuristics, or human intuition, though these can be prohibitively expensive and suboptimal. We study this problem in the context of preference optimization via DPO and propose a novel and theoretically justified algorithm, AutoMixAlign (AMA), that adaptively mixes datasets during LLM training to balance performance across multiple tasks. AMA first trains specialist models for each task to determine losses that corresponding to strong task performance. Next, AMA trains a generalist model using a novel minimax optimization that prioritizes tasks for which generalist model losses are furthest from specialist model losses. We introduce two algorithms to optimize this problem: (1) AMA-R adaptively reweights the objective to prioritize tasks, and (2) AMA-S adaptively adjusts how much data is sampled from each task to prioritize tasks. Both algorithms achieve a convergence rate of O(1/\sqrtT) in the convex case. AMA-R’s convergence result immediately follows from Sagawa et. al, 2019, and we provide a convergence proof for AMA-S using techniques from online learning such as EXP3 (Auer et. al, 2002). We evaluate AMA on several multitask alignment setups, and observe that AMA outperforms the standard alignment approach which simply optimizes the total loss across all tasks and also outperforms model-merging methods.</abstract>
<identifier type="citekey">corrado-etal-2025-automixalign</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.990</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.990/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>20234</start>
<end>20258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AutoMixAlign: Adaptive Data Mixing for Multi-Task Preference Optimization in LLMs
%A Corrado, Nicholas E.
%A Katz-Samuels, Julian
%A Devraj, Adithya M.
%A Yun, Hyokun
%A Zhang, Chao
%A Xu, Yi
%A Pan, Yi
%A Yin, Bing
%A Chilimbi, Trishul
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F corrado-etal-2025-automixalign
%X When aligning large language models (LLMs), their performance across various tasks (such as being helpful, harmless, and honest) is heavily influenced by the composition of the training data. However, it is difficult to determine what mixture of data should be used to produce a model with strong performance across all tasks. Existing approaches rely on large ablation studies, heuristics, or human intuition, though these can be prohibitively expensive and suboptimal. We study this problem in the context of preference optimization via DPO and propose a novel and theoretically justified algorithm, AutoMixAlign (AMA), that adaptively mixes datasets during LLM training to balance performance across multiple tasks. AMA first trains specialist models for each task to determine losses that corresponding to strong task performance. Next, AMA trains a generalist model using a novel minimax optimization that prioritizes tasks for which generalist model losses are furthest from specialist model losses. We introduce two algorithms to optimize this problem: (1) AMA-R adaptively reweights the objective to prioritize tasks, and (2) AMA-S adaptively adjusts how much data is sampled from each task to prioritize tasks. Both algorithms achieve a convergence rate of O(1/\sqrtT) in the convex case. AMA-R’s convergence result immediately follows from Sagawa et. al, 2019, and we provide a convergence proof for AMA-S using techniques from online learning such as EXP3 (Auer et. al, 2002). We evaluate AMA on several multitask alignment setups, and observe that AMA outperforms the standard alignment approach which simply optimizes the total loss across all tasks and also outperforms model-merging methods.
%R 10.18653/v1/2025.acl-long.990
%U https://aclanthology.org/2025.acl-long.990/
%U https://doi.org/10.18653/v1/2025.acl-long.990
%P 20234-20258
Markdown (Informal)
[AutoMixAlign: Adaptive Data Mixing for Multi-Task Preference Optimization in LLMs](https://aclanthology.org/2025.acl-long.990/) (Corrado et al., ACL 2025)
ACL
- Nicholas E. Corrado, Julian Katz-Samuels, Adithya M Devraj, Hyokun Yun, Chao Zhang, Yi Xu, Yi Pan, Bing Yin, and Trishul Chilimbi. 2025. AutoMixAlign: Adaptive Data Mixing for Multi-Task Preference Optimization in LLMs. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 20234–20258, Vienna, Austria. Association for Computational Linguistics.