@inproceedings{zhang-etal-2025-get,
title = "Get Confused Cautiously: Textual Sequence Memorization Erasure with Selective Entropy Maximization",
author = "Zhang, Zhaohan and
Liu, Ziquan and
Patras, Ioannis",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.726/",
pages = "10924--10939",
abstract = "Large Language Models (LLMs) have been found to memorize and recite some of the textual sequences from their training set verbatim, raising broad concerns about privacy and copyright issues. This Textual Sequence Memorization (TSM) phenomenon leads to a high demand to regulate LLM output to prevent generating certain memorized text that a user wants to be forgotten. However, our empirical study reveals that existing methods for TSM erasure fail to unlearn large numbers of memorized samples without substantially jeopardizing the model utility. To achieve a better trade-off between the effectiveness of TSM erasure and model utility in LLMs, our paper proposes a new method, named Entropy Maximization with Selective Optimization (EMSO), where the model parameters are updated sparsely based on novel optimization and selection criteria, in a manner that does not require additional models or data other than that in the forget set. More specifically, we propose an entropy-based loss that is shown to lead to more stable optimization and better preserves model utility than existing methods. In addition, we propose a contrastive gradient metric that takes both the gradient magnitude and direction into consideration, so as to localize model parameters to update in a sparse model updating scehme. Extensive experiments across three model scales demonstrate that our method excels in handling large-scale forgetting requests while preserving model ability in language generation and understanding."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-get">
<titleInfo>
<title>Get Confused Cautiously: Textual Sequence Memorization Erasure with Selective Entropy Maximization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhaohan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziquan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioannis</namePart>
<namePart type="family">Patras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have been found to memorize and recite some of the textual sequences from their training set verbatim, raising broad concerns about privacy and copyright issues. This Textual Sequence Memorization (TSM) phenomenon leads to a high demand to regulate LLM output to prevent generating certain memorized text that a user wants to be forgotten. However, our empirical study reveals that existing methods for TSM erasure fail to unlearn large numbers of memorized samples without substantially jeopardizing the model utility. To achieve a better trade-off between the effectiveness of TSM erasure and model utility in LLMs, our paper proposes a new method, named Entropy Maximization with Selective Optimization (EMSO), where the model parameters are updated sparsely based on novel optimization and selection criteria, in a manner that does not require additional models or data other than that in the forget set. More specifically, we propose an entropy-based loss that is shown to lead to more stable optimization and better preserves model utility than existing methods. In addition, we propose a contrastive gradient metric that takes both the gradient magnitude and direction into consideration, so as to localize model parameters to update in a sparse model updating scehme. Extensive experiments across three model scales demonstrate that our method excels in handling large-scale forgetting requests while preserving model ability in language generation and understanding.</abstract>
<identifier type="citekey">zhang-etal-2025-get</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.726/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>10924</start>
<end>10939</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Get Confused Cautiously: Textual Sequence Memorization Erasure with Selective Entropy Maximization
%A Zhang, Zhaohan
%A Liu, Ziquan
%A Patras, Ioannis
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhang-etal-2025-get
%X Large Language Models (LLMs) have been found to memorize and recite some of the textual sequences from their training set verbatim, raising broad concerns about privacy and copyright issues. This Textual Sequence Memorization (TSM) phenomenon leads to a high demand to regulate LLM output to prevent generating certain memorized text that a user wants to be forgotten. However, our empirical study reveals that existing methods for TSM erasure fail to unlearn large numbers of memorized samples without substantially jeopardizing the model utility. To achieve a better trade-off between the effectiveness of TSM erasure and model utility in LLMs, our paper proposes a new method, named Entropy Maximization with Selective Optimization (EMSO), where the model parameters are updated sparsely based on novel optimization and selection criteria, in a manner that does not require additional models or data other than that in the forget set. More specifically, we propose an entropy-based loss that is shown to lead to more stable optimization and better preserves model utility than existing methods. In addition, we propose a contrastive gradient metric that takes both the gradient magnitude and direction into consideration, so as to localize model parameters to update in a sparse model updating scehme. Extensive experiments across three model scales demonstrate that our method excels in handling large-scale forgetting requests while preserving model ability in language generation and understanding.
%U https://aclanthology.org/2025.coling-main.726/
%P 10924-10939
Markdown (Informal)
[Get Confused Cautiously: Textual Sequence Memorization Erasure with Selective Entropy Maximization](https://aclanthology.org/2025.coling-main.726/) (Zhang et al., COLING 2025)
ACL