@inproceedings{tewari-etal-2026-trust,
title = "From Trust to Compromise: Outcome-Verified {LLM} Phishing Simulation and Real-Time Defense",
author = "Tewari, Tulika and
Arachchilage, Nalin Asanka Gamagedara and
Challa, Jagat Sesh and
Kumar, Dhruv",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.543/",
doi = "10.18653/v1/2026.acl-long.543",
pages = "11831--11845",
ISBN = "979-8-89176-390-6",
abstract = "Large Language Models (LLMs) excel as conversational agents. However, these capabilities can be weaponized to automate social engineering attacks that gradually build rapport to compromise the online safety of users. To understand this, researchers have simulated LLM-based attacks in controlled settings. However, the existing simulators focus on just Personal Identifiable Information (PII) requests within the chat. Thus, to represent a complete attack scenario, we introduce PhishSim, an outcome-driven LLM-based phishing simulator that verifies compromise by simulating a victim completing an external action step, such as submitting credentials on a malicious platform. This enables the generation of diverse, multi-turn attack trajectories. Building on these trajectories, we position PhishGate as a practical mitigation baseline for outcome-grounded conversational phishing: a real-time multi-agent risk scorer that detects manipulation tactics and estimates the severity of ongoing chats. For ambiguous cases, it invokes RAG-supported consistency checks. Evaluating four state-of-the-art LLM backends in a real-time setting, we find that PhishGate improves dialogue-level detection over a real-time baseline. Our results highlight both the promise and brittleness of LLM-based real-time phishing defense, providing an outcome-grounded testbed for studying conversational compromise."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tewari-etal-2026-trust">
<titleInfo>
<title>From Trust to Compromise: Outcome-Verified LLM Phishing Simulation and Real-Time Defense</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tulika</namePart>
<namePart type="family">Tewari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nalin</namePart>
<namePart type="given">Asanka</namePart>
<namePart type="given">Gamagedara</namePart>
<namePart type="family">Arachchilage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jagat</namePart>
<namePart type="given">Sesh</namePart>
<namePart type="family">Challa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhruv</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) excel as conversational agents. However, these capabilities can be weaponized to automate social engineering attacks that gradually build rapport to compromise the online safety of users. To understand this, researchers have simulated LLM-based attacks in controlled settings. However, the existing simulators focus on just Personal Identifiable Information (PII) requests within the chat. Thus, to represent a complete attack scenario, we introduce PhishSim, an outcome-driven LLM-based phishing simulator that verifies compromise by simulating a victim completing an external action step, such as submitting credentials on a malicious platform. This enables the generation of diverse, multi-turn attack trajectories. Building on these trajectories, we position PhishGate as a practical mitigation baseline for outcome-grounded conversational phishing: a real-time multi-agent risk scorer that detects manipulation tactics and estimates the severity of ongoing chats. For ambiguous cases, it invokes RAG-supported consistency checks. Evaluating four state-of-the-art LLM backends in a real-time setting, we find that PhishGate improves dialogue-level detection over a real-time baseline. Our results highlight both the promise and brittleness of LLM-based real-time phishing defense, providing an outcome-grounded testbed for studying conversational compromise.</abstract>
<identifier type="citekey">tewari-etal-2026-trust</identifier>
<identifier type="doi">10.18653/v1/2026.acl-long.543</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.543/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>11831</start>
<end>11845</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Trust to Compromise: Outcome-Verified LLM Phishing Simulation and Real-Time Defense
%A Tewari, Tulika
%A Arachchilage, Nalin Asanka Gamagedara
%A Challa, Jagat Sesh
%A Kumar, Dhruv
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F tewari-etal-2026-trust
%X Large Language Models (LLMs) excel as conversational agents. However, these capabilities can be weaponized to automate social engineering attacks that gradually build rapport to compromise the online safety of users. To understand this, researchers have simulated LLM-based attacks in controlled settings. However, the existing simulators focus on just Personal Identifiable Information (PII) requests within the chat. Thus, to represent a complete attack scenario, we introduce PhishSim, an outcome-driven LLM-based phishing simulator that verifies compromise by simulating a victim completing an external action step, such as submitting credentials on a malicious platform. This enables the generation of diverse, multi-turn attack trajectories. Building on these trajectories, we position PhishGate as a practical mitigation baseline for outcome-grounded conversational phishing: a real-time multi-agent risk scorer that detects manipulation tactics and estimates the severity of ongoing chats. For ambiguous cases, it invokes RAG-supported consistency checks. Evaluating four state-of-the-art LLM backends in a real-time setting, we find that PhishGate improves dialogue-level detection over a real-time baseline. Our results highlight both the promise and brittleness of LLM-based real-time phishing defense, providing an outcome-grounded testbed for studying conversational compromise.
%R 10.18653/v1/2026.acl-long.543
%U https://aclanthology.org/2026.acl-long.543/
%U https://doi.org/10.18653/v1/2026.acl-long.543
%P 11831-11845
Markdown (Informal)
[From Trust to Compromise: Outcome-Verified LLM Phishing Simulation and Real-Time Defense](https://aclanthology.org/2026.acl-long.543/) (Tewari et al., ACL 2026)
ACL