@inproceedings{vijjini-etal-2025-exploring,
title = "Exploring Safety-Utility Trade-Offs in Personalized Language Models",
author = "Vijjini, Anvesh Rao and
Basu Roy Chowdhury, Somnath and
Chaturvedi, Snigdha",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.565/",
doi = "10.18653/v1/2025.naacl-long.565",
pages = "11316--11340",
ISBN = "979-8-89176-189-6",
abstract = "As large language models (LLMs) become increasingly integrated into daily applications, it is essential to ensure they function fairly across diverse user demographics. In this work, we show that LLMs suffer from personalization bias, where their performance is impacted when they are personalized to a user{'}s identity. We quantify personalization bias by evaluating the performance of LLMs along two axes - safety and utility. We measure safety by examining how benign LLM responses are to unsafe prompts. We measure utility by evaluating the LLM{'}s performance on various tasks, including general knowledge, mathematical abilities, programming, and reasoning skills. We find that various LLMs, ranging from open-source models like Llama-3.1 and Mistral to API-based ones like GPT-3.5 and GPT-4o, exhibit significant variance in performance in terms of safety and utility when personalized with different user identities. Finally, we discuss several strategies to mitigate personalization bias and investigate the origin of personalization bias."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vijjini-etal-2025-exploring">
<titleInfo>
<title>Exploring Safety-Utility Trade-Offs in Personalized Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anvesh</namePart>
<namePart type="given">Rao</namePart>
<namePart type="family">Vijjini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Somnath</namePart>
<namePart type="family">Basu Roy Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Snigdha</namePart>
<namePart type="family">Chaturvedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-189-6</identifier>
</relatedItem>
<abstract>As large language models (LLMs) become increasingly integrated into daily applications, it is essential to ensure they function fairly across diverse user demographics. In this work, we show that LLMs suffer from personalization bias, where their performance is impacted when they are personalized to a user’s identity. We quantify personalization bias by evaluating the performance of LLMs along two axes - safety and utility. We measure safety by examining how benign LLM responses are to unsafe prompts. We measure utility by evaluating the LLM’s performance on various tasks, including general knowledge, mathematical abilities, programming, and reasoning skills. We find that various LLMs, ranging from open-source models like Llama-3.1 and Mistral to API-based ones like GPT-3.5 and GPT-4o, exhibit significant variance in performance in terms of safety and utility when personalized with different user identities. Finally, we discuss several strategies to mitigate personalization bias and investigate the origin of personalization bias.</abstract>
<identifier type="citekey">vijjini-etal-2025-exploring</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-long.565</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-long.565/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>11316</start>
<end>11340</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Safety-Utility Trade-Offs in Personalized Language Models
%A Vijjini, Anvesh Rao
%A Basu Roy Chowdhury, Somnath
%A Chaturvedi, Snigdha
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-189-6
%F vijjini-etal-2025-exploring
%X As large language models (LLMs) become increasingly integrated into daily applications, it is essential to ensure they function fairly across diverse user demographics. In this work, we show that LLMs suffer from personalization bias, where their performance is impacted when they are personalized to a user’s identity. We quantify personalization bias by evaluating the performance of LLMs along two axes - safety and utility. We measure safety by examining how benign LLM responses are to unsafe prompts. We measure utility by evaluating the LLM’s performance on various tasks, including general knowledge, mathematical abilities, programming, and reasoning skills. We find that various LLMs, ranging from open-source models like Llama-3.1 and Mistral to API-based ones like GPT-3.5 and GPT-4o, exhibit significant variance in performance in terms of safety and utility when personalized with different user identities. Finally, we discuss several strategies to mitigate personalization bias and investigate the origin of personalization bias.
%R 10.18653/v1/2025.naacl-long.565
%U https://aclanthology.org/2025.naacl-long.565/
%U https://doi.org/10.18653/v1/2025.naacl-long.565
%P 11316-11340
Markdown (Informal)
[Exploring Safety-Utility Trade-Offs in Personalized Language Models](https://aclanthology.org/2025.naacl-long.565/) (Vijjini et al., NAACL 2025)
ACL
- Anvesh Rao Vijjini, Somnath Basu Roy Chowdhury, and Snigdha Chaturvedi. 2025. Exploring Safety-Utility Trade-Offs in Personalized Language Models. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 11316–11340, Albuquerque, New Mexico. Association for Computational Linguistics.