@inproceedings{cahyawijaya-etal-2025-high,
title = "High-Dimension Human Value Representation in Large Language Models",
author = "Cahyawijaya, Samuel and
Chen, Delong and
Bang, Yejin and
Khalatbari, Leila and
Wilie, Bryan and
Ji, Ziwei and
Ishii, Etsuko and
Fung, Pascale",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.274/",
doi = "10.18653/v1/2025.naacl-long.274",
pages = "5303--5330",
ISBN = "979-8-89176-189-6",
abstract = "The widespread application of Large Language Models (LLMs) across various tasks and fields has necessitated the alignment of these models with human values and preferences. Given various approaches of human value alignment, such as Reinforcement Learning with Human Feedback (RLHF), constitutional learning, and safety fine-tuning etc., there is an urgent need to understand the scope and nature of human values injected into these LLMs before their deployment and adoption. We propose UniVar, a high-dimensional neural representation of symbolic human value distributions in LLMs, orthogonal to model architecture and training data. This is a continuous and scalable representation, self-supervised from the value-relevant output of 8 LLMs and evaluated on 15 open-source and commercial LLMs. Through UniVar, we visualize and explore how LLMs prioritize different values in 25 languages and cultures, shedding light on the complex interplay between human values and language modeling."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cahyawijaya-etal-2025-high">
<titleInfo>
<title>High-Dimension Human Value Representation in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Cahyawijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yejin</namePart>
<namePart type="family">Bang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leila</namePart>
<namePart type="family">Khalatbari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">Wilie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziwei</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Etsuko</namePart>
<namePart type="family">Ishii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pascale</namePart>
<namePart type="family">Fung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-189-6</identifier>
</relatedItem>
<abstract>The widespread application of Large Language Models (LLMs) across various tasks and fields has necessitated the alignment of these models with human values and preferences. Given various approaches of human value alignment, such as Reinforcement Learning with Human Feedback (RLHF), constitutional learning, and safety fine-tuning etc., there is an urgent need to understand the scope and nature of human values injected into these LLMs before their deployment and adoption. We propose UniVar, a high-dimensional neural representation of symbolic human value distributions in LLMs, orthogonal to model architecture and training data. This is a continuous and scalable representation, self-supervised from the value-relevant output of 8 LLMs and evaluated on 15 open-source and commercial LLMs. Through UniVar, we visualize and explore how LLMs prioritize different values in 25 languages and cultures, shedding light on the complex interplay between human values and language modeling.</abstract>
<identifier type="citekey">cahyawijaya-etal-2025-high</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-long.274</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-long.274/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>5303</start>
<end>5330</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T High-Dimension Human Value Representation in Large Language Models
%A Cahyawijaya, Samuel
%A Chen, Delong
%A Bang, Yejin
%A Khalatbari, Leila
%A Wilie, Bryan
%A Ji, Ziwei
%A Ishii, Etsuko
%A Fung, Pascale
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-189-6
%F cahyawijaya-etal-2025-high
%X The widespread application of Large Language Models (LLMs) across various tasks and fields has necessitated the alignment of these models with human values and preferences. Given various approaches of human value alignment, such as Reinforcement Learning with Human Feedback (RLHF), constitutional learning, and safety fine-tuning etc., there is an urgent need to understand the scope and nature of human values injected into these LLMs before their deployment and adoption. We propose UniVar, a high-dimensional neural representation of symbolic human value distributions in LLMs, orthogonal to model architecture and training data. This is a continuous and scalable representation, self-supervised from the value-relevant output of 8 LLMs and evaluated on 15 open-source and commercial LLMs. Through UniVar, we visualize and explore how LLMs prioritize different values in 25 languages and cultures, shedding light on the complex interplay between human values and language modeling.
%R 10.18653/v1/2025.naacl-long.274
%U https://aclanthology.org/2025.naacl-long.274/
%U https://doi.org/10.18653/v1/2025.naacl-long.274
%P 5303-5330
Markdown (Informal)
[High-Dimension Human Value Representation in Large Language Models](https://aclanthology.org/2025.naacl-long.274/) (Cahyawijaya et al., NAACL 2025)
ACL
- Samuel Cahyawijaya, Delong Chen, Yejin Bang, Leila Khalatbari, Bryan Wilie, Ziwei Ji, Etsuko Ishii, and Pascale Fung. 2025. High-Dimension Human Value Representation in Large Language Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 5303–5330, Albuquerque, New Mexico. Association for Computational Linguistics.