@inproceedings{chen-shen-2026-value,
title = "{VALUE} {ALIGNMENT} {TAX}: Measuring Value Trade-offs in {LLM} Alignment",
author = "Chen, Jiajun and
Shen, Hua",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1749/",
pages = "35046--35069",
ISBN = "979-8-89176-395-1",
abstract = "Existing work on value alignment typically characterizes value relations statically, ignoring how alignment interventions{---}such as prompting, fine-tuning, or preference optimization{---}reshape the broader value system. In practice, aligning a target value can implicitly shift other values, creating value trade-offs that remain largely unmeasured.We introduce the VAT, a framework that quantifies value trade-offs by measuring how alignment-induced changes propagate across interconnected values relative to achieved on-target gain. VAT captures the system-level dynamics of value expression under alignment intervention, enabling evaluation of both intended improvements and unintended side effects.Using a controlled scenario{--}action dataset grounded in Schwartz value theory, we collect paired pre{--}post normative judgments and analyze alignment effects across models, values, and interventions. Results show that alignment often produces uneven and structured co-movement among values, revealing systematic trade-offs between target and non-target values. These effects are largely invisible under conventional target-only evaluation, but become evident via VAT, highlighting process-level alignment risks and offering new insights into the dynamic nature of value alignment in LLMs.Dataset and code are open-sourced."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-shen-2026-value">
<titleInfo>
<title>VALUE ALIGNMENT TAX: Measuring Value Trade-offs in LLM Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Existing work on value alignment typically characterizes value relations statically, ignoring how alignment interventions—such as prompting, fine-tuning, or preference optimization—reshape the broader value system. In practice, aligning a target value can implicitly shift other values, creating value trade-offs that remain largely unmeasured.We introduce the VAT, a framework that quantifies value trade-offs by measuring how alignment-induced changes propagate across interconnected values relative to achieved on-target gain. VAT captures the system-level dynamics of value expression under alignment intervention, enabling evaluation of both intended improvements and unintended side effects.Using a controlled scenario–action dataset grounded in Schwartz value theory, we collect paired pre–post normative judgments and analyze alignment effects across models, values, and interventions. Results show that alignment often produces uneven and structured co-movement among values, revealing systematic trade-offs between target and non-target values. These effects are largely invisible under conventional target-only evaluation, but become evident via VAT, highlighting process-level alignment risks and offering new insights into the dynamic nature of value alignment in LLMs.Dataset and code are open-sourced.</abstract>
<identifier type="citekey">chen-shen-2026-value</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1749/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>35046</start>
<end>35069</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T VALUE ALIGNMENT TAX: Measuring Value Trade-offs in LLM Alignment
%A Chen, Jiajun
%A Shen, Hua
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F chen-shen-2026-value
%X Existing work on value alignment typically characterizes value relations statically, ignoring how alignment interventions—such as prompting, fine-tuning, or preference optimization—reshape the broader value system. In practice, aligning a target value can implicitly shift other values, creating value trade-offs that remain largely unmeasured.We introduce the VAT, a framework that quantifies value trade-offs by measuring how alignment-induced changes propagate across interconnected values relative to achieved on-target gain. VAT captures the system-level dynamics of value expression under alignment intervention, enabling evaluation of both intended improvements and unintended side effects.Using a controlled scenario–action dataset grounded in Schwartz value theory, we collect paired pre–post normative judgments and analyze alignment effects across models, values, and interventions. Results show that alignment often produces uneven and structured co-movement among values, revealing systematic trade-offs between target and non-target values. These effects are largely invisible under conventional target-only evaluation, but become evident via VAT, highlighting process-level alignment risks and offering new insights into the dynamic nature of value alignment in LLMs.Dataset and code are open-sourced.
%U https://aclanthology.org/2026.findings-acl.1749/
%P 35046-35069
Markdown (Informal)
[VALUE ALIGNMENT TAX: Measuring Value Trade-offs in LLM Alignment](https://aclanthology.org/2026.findings-acl.1749/) (Chen & Shen, Findings 2026)
ACL