@article{more-etal-2025-towards,
title = "Towards More Realistic Extraction Attacks: An Adversarial Perspective",
author = "More, Yash and
Ganesh, Prakhar and
Farnadi, Golnoosh",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.tacl-1.82/",
doi = "10.1162/tacl.a.62",
pages = "1832--1849",
abstract = "Language models are prone to memorizing their training data, making them vulnerable to extraction attacks. While existing research often examines isolated setups, such as a single model or a fixed prompt, real-world adversaries have a considerably larger attack surface due to access to models across various sizes and checkpoints, and repeated prompting. In this paper, we revisit extraction attacks from an adversarial perspective{---}with multi-faceted access to the underlying data. We find significant churn in extraction trends, i.e., even unintuitive changes to the prompt, or targeting smaller models and earlier checkpoints, can extract distinct information. By combining multiple attacks, our adversary doubles (2 {\texttimes}) the extraction risks, persisting even under mitigation strategies like data deduplication. We conclude with four case studies, including detecting pre-training data, copyright violations, extracting personally identifiable information, and attacking closed-source models, showing how our more realistic adversary can outperform existing adversaries in the literature."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="more-etal-2025-towards">
<titleInfo>
<title>Towards More Realistic Extraction Attacks: An Adversarial Perspective</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="family">More</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prakhar</namePart>
<namePart type="family">Ganesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Golnoosh</namePart>
<namePart type="family">Farnadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Language models are prone to memorizing their training data, making them vulnerable to extraction attacks. While existing research often examines isolated setups, such as a single model or a fixed prompt, real-world adversaries have a considerably larger attack surface due to access to models across various sizes and checkpoints, and repeated prompting. In this paper, we revisit extraction attacks from an adversarial perspective—with multi-faceted access to the underlying data. We find significant churn in extraction trends, i.e., even unintuitive changes to the prompt, or targeting smaller models and earlier checkpoints, can extract distinct information. By combining multiple attacks, our adversary doubles (2 ×) the extraction risks, persisting even under mitigation strategies like data deduplication. We conclude with four case studies, including detecting pre-training data, copyright violations, extracting personally identifiable information, and attacking closed-source models, showing how our more realistic adversary can outperform existing adversaries in the literature.</abstract>
<identifier type="citekey">more-etal-2025-towards</identifier>
<identifier type="doi">10.1162/tacl.a.62</identifier>
<location>
<url>https://aclanthology.org/2025.tacl-1.82/</url>
</location>
<part>
<date>2025</date>
<detail type="volume"><number>13</number></detail>
<extent unit="page">
<start>1832</start>
<end>1849</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Towards More Realistic Extraction Attacks: An Adversarial Perspective
%A More, Yash
%A Ganesh, Prakhar
%A Farnadi, Golnoosh
%J Transactions of the Association for Computational Linguistics
%D 2025
%V 13
%I MIT Press
%C Cambridge, MA
%F more-etal-2025-towards
%X Language models are prone to memorizing their training data, making them vulnerable to extraction attacks. While existing research often examines isolated setups, such as a single model or a fixed prompt, real-world adversaries have a considerably larger attack surface due to access to models across various sizes and checkpoints, and repeated prompting. In this paper, we revisit extraction attacks from an adversarial perspective—with multi-faceted access to the underlying data. We find significant churn in extraction trends, i.e., even unintuitive changes to the prompt, or targeting smaller models and earlier checkpoints, can extract distinct information. By combining multiple attacks, our adversary doubles (2 ×) the extraction risks, persisting even under mitigation strategies like data deduplication. We conclude with four case studies, including detecting pre-training data, copyright violations, extracting personally identifiable information, and attacking closed-source models, showing how our more realistic adversary can outperform existing adversaries in the literature.
%R 10.1162/tacl.a.62
%U https://aclanthology.org/2025.tacl-1.82/
%U https://doi.org/10.1162/tacl.a.62
%P 1832-1849
Markdown (Informal)
[Towards More Realistic Extraction Attacks: An Adversarial Perspective](https://aclanthology.org/2025.tacl-1.82/) (More et al., TACL 2025)
ACL