@inproceedings{das-etal-2025-dpo,
title = "{DPO} Kernels: A Semantically-Aware, Kernel-Enhanced, and Divergence-Rich Paradigm for Direct Preference Optimization",
author = "Das, Amitava and
Trivedy, Suranjana and
Khanna, Danush and
Narsupalli, Yaswanth and
Ghosh, Basab and
Roy, Rajarshi and
Singh, Gurpreet and
Jain, Vinija and
Sharma, Vasu and
Reganti, Aishwarya Naresh and
Chadha, Aman",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1143/",
doi = "10.18653/v1/2025.findings-acl.1143",
pages = "22174--22270",
ISBN = "979-8-89176-256-5",
abstract = "The rapid advancement of large language models (LLMs) has revolutionized numerous applications, but presents significant challenges in aligning these models with diverse human values, ethical standards, and specific user preferences. Direct Preference Optimization (DPO) has become a cornerstone for preference alignment but is constrained by reliance on fixed divergence measures and limited feature transformations. We introduce \textbf{DPO-Kernels}, an innovative enhancement of DPO that integrates kernel methods to overcome these challenges through four key contributions: (i) \textbf{Kernelized Representations}: These representations enhance divergence measures by using polynomial, RBF, Mahalanobis, and spectral kernels for richer feature transformations. Additionally, we introduce a \textbf{hybrid loss} that combines embedding-based loss with probability-based loss; (ii) \textbf{Divergence Alternatives}: Beyond Kullback{--}Leibler (KL), we incorporate Jensen-Shannon, Hellinger, R{\'e}nyi, Bhattacharyya, Wasserstein, and other f-divergences to boost stability and robustness; (iii) \textbf{Data-Driven Selection}: Choosing the optimal kernel-divergence pair among 28 combinations (4 kernels $\times$ 7 divergences) is challenging. We introduce automatic metrics that analyze the data to select the best kernel-divergence pair, eliminating the need for manual tuning; (iv) \textbf{Hierarchical Mixture of Kernels (HMK)}: Combining local and global kernels for precise and large-scale semantic modeling. This approach automatically selects the optimal kernel mixture during training, enhancing modeling flexibility. DPO-Kernels achieve state-of-the-art generalization in factuality, safety, reasoning, and instruction following across 12 datasets. While alignment risks overfitting, Heavy-Tailed Self-Regularization (HT-SR) theory confirms that DPO-Kernels ensure robust generalization in LLMs. Comprehensive resources are available to facilitate further research and application of DPO-Kernels."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="das-etal-2025-dpo">
<titleInfo>
<title>DPO Kernels: A Semantically-Aware, Kernel-Enhanced, and Divergence-Rich Paradigm for Direct Preference Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amitava</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suranjana</namePart>
<namePart type="family">Trivedy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danush</namePart>
<namePart type="family">Khanna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaswanth</namePart>
<namePart type="family">Narsupalli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Basab</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajarshi</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gurpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinija</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasu</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aishwarya</namePart>
<namePart type="given">Naresh</namePart>
<namePart type="family">Reganti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aman</namePart>
<namePart type="family">Chadha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>The rapid advancement of large language models (LLMs) has revolutionized numerous applications, but presents significant challenges in aligning these models with diverse human values, ethical standards, and specific user preferences. Direct Preference Optimization (DPO) has become a cornerstone for preference alignment but is constrained by reliance on fixed divergence measures and limited feature transformations. We introduce DPO-Kernels, an innovative enhancement of DPO that integrates kernel methods to overcome these challenges through four key contributions: (i) Kernelized Representations: These representations enhance divergence measures by using polynomial, RBF, Mahalanobis, and spectral kernels for richer feature transformations. Additionally, we introduce a hybrid loss that combines embedding-based loss with probability-based loss; (ii) Divergence Alternatives: Beyond Kullback–Leibler (KL), we incorporate Jensen-Shannon, Hellinger, Rényi, Bhattacharyya, Wasserstein, and other f-divergences to boost stability and robustness; (iii) Data-Driven Selection: Choosing the optimal kernel-divergence pair among 28 combinations (4 kernels \times 7 divergences) is challenging. We introduce automatic metrics that analyze the data to select the best kernel-divergence pair, eliminating the need for manual tuning; (iv) Hierarchical Mixture of Kernels (HMK): Combining local and global kernels for precise and large-scale semantic modeling. This approach automatically selects the optimal kernel mixture during training, enhancing modeling flexibility. DPO-Kernels achieve state-of-the-art generalization in factuality, safety, reasoning, and instruction following across 12 datasets. While alignment risks overfitting, Heavy-Tailed Self-Regularization (HT-SR) theory confirms that DPO-Kernels ensure robust generalization in LLMs. Comprehensive resources are available to facilitate further research and application of DPO-Kernels.</abstract>
<identifier type="citekey">das-etal-2025-dpo</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1143</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1143/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>22174</start>
<end>22270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DPO Kernels: A Semantically-Aware, Kernel-Enhanced, and Divergence-Rich Paradigm for Direct Preference Optimization
%A Das, Amitava
%A Trivedy, Suranjana
%A Khanna, Danush
%A Narsupalli, Yaswanth
%A Ghosh, Basab
%A Roy, Rajarshi
%A Singh, Gurpreet
%A Jain, Vinija
%A Sharma, Vasu
%A Reganti, Aishwarya Naresh
%A Chadha, Aman
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F das-etal-2025-dpo
%X The rapid advancement of large language models (LLMs) has revolutionized numerous applications, but presents significant challenges in aligning these models with diverse human values, ethical standards, and specific user preferences. Direct Preference Optimization (DPO) has become a cornerstone for preference alignment but is constrained by reliance on fixed divergence measures and limited feature transformations. We introduce DPO-Kernels, an innovative enhancement of DPO that integrates kernel methods to overcome these challenges through four key contributions: (i) Kernelized Representations: These representations enhance divergence measures by using polynomial, RBF, Mahalanobis, and spectral kernels for richer feature transformations. Additionally, we introduce a hybrid loss that combines embedding-based loss with probability-based loss; (ii) Divergence Alternatives: Beyond Kullback–Leibler (KL), we incorporate Jensen-Shannon, Hellinger, Rényi, Bhattacharyya, Wasserstein, and other f-divergences to boost stability and robustness; (iii) Data-Driven Selection: Choosing the optimal kernel-divergence pair among 28 combinations (4 kernels \times 7 divergences) is challenging. We introduce automatic metrics that analyze the data to select the best kernel-divergence pair, eliminating the need for manual tuning; (iv) Hierarchical Mixture of Kernels (HMK): Combining local and global kernels for precise and large-scale semantic modeling. This approach automatically selects the optimal kernel mixture during training, enhancing modeling flexibility. DPO-Kernels achieve state-of-the-art generalization in factuality, safety, reasoning, and instruction following across 12 datasets. While alignment risks overfitting, Heavy-Tailed Self-Regularization (HT-SR) theory confirms that DPO-Kernels ensure robust generalization in LLMs. Comprehensive resources are available to facilitate further research and application of DPO-Kernels.
%R 10.18653/v1/2025.findings-acl.1143
%U https://aclanthology.org/2025.findings-acl.1143/
%U https://doi.org/10.18653/v1/2025.findings-acl.1143
%P 22174-22270
Markdown (Informal)
[DPO Kernels: A Semantically-Aware, Kernel-Enhanced, and Divergence-Rich Paradigm for Direct Preference Optimization](https://aclanthology.org/2025.findings-acl.1143/) (Das et al., Findings 2025)
ACL
- Amitava Das, Suranjana Trivedy, Danush Khanna, Yaswanth Narsupalli, Basab Ghosh, Rajarshi Roy, Gurpreet Singh, Vinija Jain, Vasu Sharma, Aishwarya Naresh Reganti, and Aman Chadha. 2025. DPO Kernels: A Semantically-Aware, Kernel-Enhanced, and Divergence-Rich Paradigm for Direct Preference Optimization. In Findings of the Association for Computational Linguistics: ACL 2025, pages 22174–22270, Vienna, Austria. Association for Computational Linguistics.