@inproceedings{yao-etal-2024-value,
title = "Value {FULCRA}: Mapping Large Language Models to the Multidimensional Spectrum of Basic Human Value",
author = "Yao, Jing and
Yi, Xiaoyuan and
Gong, Yifan and
Wang, Xiting and
Xie, Xing",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.486",
doi = "10.18653/v1/2024.naacl-long.486",
pages = "8762--8785",
abstract = "Value alignment is crucial for the responsible development of Large Language Models (LLMs). However, how to define values in this context remains largely unexplored. Existing work mainly specifies values as risk criteria formulated in the AI community, e.g., fairness and privacy protection, suffering from poor clarity, adaptability and transparency. Leveraging basic values established in humanity and social science that are compatible with values across cultures, this paper introduces a novel value space spanned by multiple basic value dimensions and proposes BaseAlign, a corresponding value alignment paradigm. Applying the representative Schwartz{'}s Theory of Basic Values as an instantiation, we construct FULCRA, a dataset consisting of 20k $($LLM output, value vector$)$ pairs. LLMs{'} outputs are mapped into the $K$-dim value space beyond simple binary labels, by identifying their underlying priorities for these value dimensions. Extensive analysis and experiments on FULCRA: (1) reveal the essential relation between basic values and LLMs{'} behaviors, (2) demonstrate that our paradigm with basic values not only covers existing risks but also anticipates the unidentified ones, and (3) manifest BaseAlign{'}s superiority in alignment performance with less data, paving the way for addressing the above three challenges.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yao-etal-2024-value">
<titleInfo>
<title>Value FULCRA: Mapping Large Language Models to the Multidimensional Spectrum of Basic Human Value</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyuan</namePart>
<namePart type="family">Yi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yifan</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiting</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Value alignment is crucial for the responsible development of Large Language Models (LLMs). However, how to define values in this context remains largely unexplored. Existing work mainly specifies values as risk criteria formulated in the AI community, e.g., fairness and privacy protection, suffering from poor clarity, adaptability and transparency. Leveraging basic values established in humanity and social science that are compatible with values across cultures, this paper introduces a novel value space spanned by multiple basic value dimensions and proposes BaseAlign, a corresponding value alignment paradigm. Applying the representative Schwartz’s Theory of Basic Values as an instantiation, we construct FULCRA, a dataset consisting of 20k (LLM output, value vector) pairs. LLMs’ outputs are mapped into the K-dim value space beyond simple binary labels, by identifying their underlying priorities for these value dimensions. Extensive analysis and experiments on FULCRA: (1) reveal the essential relation between basic values and LLMs’ behaviors, (2) demonstrate that our paradigm with basic values not only covers existing risks but also anticipates the unidentified ones, and (3) manifest BaseAlign’s superiority in alignment performance with less data, paving the way for addressing the above three challenges.</abstract>
<identifier type="citekey">yao-etal-2024-value</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.486</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.486</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>8762</start>
<end>8785</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Value FULCRA: Mapping Large Language Models to the Multidimensional Spectrum of Basic Human Value
%A Yao, Jing
%A Yi, Xiaoyuan
%A Gong, Yifan
%A Wang, Xiting
%A Xie, Xing
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F yao-etal-2024-value
%X Value alignment is crucial for the responsible development of Large Language Models (LLMs). However, how to define values in this context remains largely unexplored. Existing work mainly specifies values as risk criteria formulated in the AI community, e.g., fairness and privacy protection, suffering from poor clarity, adaptability and transparency. Leveraging basic values established in humanity and social science that are compatible with values across cultures, this paper introduces a novel value space spanned by multiple basic value dimensions and proposes BaseAlign, a corresponding value alignment paradigm. Applying the representative Schwartz’s Theory of Basic Values as an instantiation, we construct FULCRA, a dataset consisting of 20k (LLM output, value vector) pairs. LLMs’ outputs are mapped into the K-dim value space beyond simple binary labels, by identifying their underlying priorities for these value dimensions. Extensive analysis and experiments on FULCRA: (1) reveal the essential relation between basic values and LLMs’ behaviors, (2) demonstrate that our paradigm with basic values not only covers existing risks but also anticipates the unidentified ones, and (3) manifest BaseAlign’s superiority in alignment performance with less data, paving the way for addressing the above three challenges.
%R 10.18653/v1/2024.naacl-long.486
%U https://aclanthology.org/2024.naacl-long.486
%U https://doi.org/10.18653/v1/2024.naacl-long.486
%P 8762-8785
Markdown (Informal)
[Value FULCRA: Mapping Large Language Models to the Multidimensional Spectrum of Basic Human Value](https://aclanthology.org/2024.naacl-long.486) (Yao et al., NAACL 2024)
ACL