@inproceedings{oh-etal-2025-uncovering,
title = "Uncovering Factor-Level Preference to Improve Human-Model Alignment",
author = "Oh, Juhyun and
Kim, Eunsu and
Kim, Jiseon and
Xu, Wenda and
Cha, Inha and
Wang, William Yang and
Oh, Alice",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1045/",
doi = "10.18653/v1/2025.findings-emnlp.1045",
pages = "19179--19203",
ISBN = "979-8-89176-335-7",
abstract = "Large language models (LLMs) often exhibit tendencies that diverge from human preferences, such as favoring certain writing styles or producing overly verbose outputs. While crucial for improvement, identifying the factors driving these misalignments remains challenging due to existing evaluation methods' reliance on coarse-grained comparisons and lack of explainability.To address this, we introduce PROFILE, an automated framework to uncover and measure factor-level preference alignment of humans and LLMs.Using PROFILE, we analyze preference alignment across three key tasks: summarization, instruction-following, and document-based QA. We find a significant discrepancy: while LLMs show poor factor-level alignment with human preferences when generating texts, they demonstrate strong alignment in discrimination tasks. We demonstrate how leveraging the identified generation-discrimination gap can be used to improve LLM alignment through multiple approaches, including fine-tuning with self-guidance.Our work highlights the value of factor-level analysis for identifying hidden misalignments and provides a practical framework for improving LLM-human preference alignment."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oh-etal-2025-uncovering">
<titleInfo>
<title>Uncovering Factor-Level Preference to Improve Human-Model Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juhyun</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsu</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiseon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenda</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inha</namePart>
<namePart type="family">Cha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">Yang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Oh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) often exhibit tendencies that diverge from human preferences, such as favoring certain writing styles or producing overly verbose outputs. While crucial for improvement, identifying the factors driving these misalignments remains challenging due to existing evaluation methods’ reliance on coarse-grained comparisons and lack of explainability.To address this, we introduce PROFILE, an automated framework to uncover and measure factor-level preference alignment of humans and LLMs.Using PROFILE, we analyze preference alignment across three key tasks: summarization, instruction-following, and document-based QA. We find a significant discrepancy: while LLMs show poor factor-level alignment with human preferences when generating texts, they demonstrate strong alignment in discrimination tasks. We demonstrate how leveraging the identified generation-discrimination gap can be used to improve LLM alignment through multiple approaches, including fine-tuning with self-guidance.Our work highlights the value of factor-level analysis for identifying hidden misalignments and provides a practical framework for improving LLM-human preference alignment.</abstract>
<identifier type="citekey">oh-etal-2025-uncovering</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.1045</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1045/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>19179</start>
<end>19203</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Uncovering Factor-Level Preference to Improve Human-Model Alignment
%A Oh, Juhyun
%A Kim, Eunsu
%A Kim, Jiseon
%A Xu, Wenda
%A Cha, Inha
%A Wang, William Yang
%A Oh, Alice
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F oh-etal-2025-uncovering
%X Large language models (LLMs) often exhibit tendencies that diverge from human preferences, such as favoring certain writing styles or producing overly verbose outputs. While crucial for improvement, identifying the factors driving these misalignments remains challenging due to existing evaluation methods’ reliance on coarse-grained comparisons and lack of explainability.To address this, we introduce PROFILE, an automated framework to uncover and measure factor-level preference alignment of humans and LLMs.Using PROFILE, we analyze preference alignment across three key tasks: summarization, instruction-following, and document-based QA. We find a significant discrepancy: while LLMs show poor factor-level alignment with human preferences when generating texts, they demonstrate strong alignment in discrimination tasks. We demonstrate how leveraging the identified generation-discrimination gap can be used to improve LLM alignment through multiple approaches, including fine-tuning with self-guidance.Our work highlights the value of factor-level analysis for identifying hidden misalignments and provides a practical framework for improving LLM-human preference alignment.
%R 10.18653/v1/2025.findings-emnlp.1045
%U https://aclanthology.org/2025.findings-emnlp.1045/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.1045
%P 19179-19203
Markdown (Informal)
[Uncovering Factor-Level Preference to Improve Human-Model Alignment](https://aclanthology.org/2025.findings-emnlp.1045/) (Oh et al., Findings 2025)
ACL