@inproceedings{gromada-etal-2025-evaluating,
title = "Evaluating Conversational Agents with Persona-driven User Simulations based on Large Language Models: A Sales Bot Case Study",
author = "Gromada, Justyna and
Kasicka, Alicja and
Komkowska, Ewa and
Krajewski, Lukasz and
Krawczyk, Natalia and
Veyret, Morgan and
Przyby{\l}, Bartosz and
Rojas-Barahona, Lina M. and
Szczerbak, Micha{\l} K.",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.16/",
pages = "230--245",
ISBN = "979-8-89176-333-3",
abstract = "We present a novel approach to conversational agent evaluation using Persona-driven User Simulations based on Large Language Models (LLMs). Our methodology first uses LLMs to generate diverse customer personas, which are then used to configure a single LLM-based user simulator. This simulator evaluates SalesBot 2.0, a proactive conversational sales agent. We introduce a dataset of these personas, along with corresponding goals and conversation scenarios, enabling comprehensive testing across different customer types with varying assertiveness levels and precision of needs. Our evaluation framework assesses both the simulator{'}s adherence to persona instructions and the bot{'}s performance across multiple dimensions, combining human annotation with LLM-as-a-judge assessments using commercial and open-source models. Results demonstrate that our LLM-based simulator effectively emulates nuanced customer roles, and that cross-selling strategies can be implemented with minimal impact on customer satisfaction, varying by customer type."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gromada-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Conversational Agents with Persona-driven User Simulations based on Large Language Models: A Sales Bot Case Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Justyna</namePart>
<namePart type="family">Gromada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicja</namePart>
<namePart type="family">Kasicka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewa</namePart>
<namePart type="family">Komkowska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukasz</namePart>
<namePart type="family">Krajewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Krawczyk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Morgan</namePart>
<namePart type="family">Veyret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bartosz</namePart>
<namePart type="family">Przybył</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Szczerbak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>We present a novel approach to conversational agent evaluation using Persona-driven User Simulations based on Large Language Models (LLMs). Our methodology first uses LLMs to generate diverse customer personas, which are then used to configure a single LLM-based user simulator. This simulator evaluates SalesBot 2.0, a proactive conversational sales agent. We introduce a dataset of these personas, along with corresponding goals and conversation scenarios, enabling comprehensive testing across different customer types with varying assertiveness levels and precision of needs. Our evaluation framework assesses both the simulator’s adherence to persona instructions and the bot’s performance across multiple dimensions, combining human annotation with LLM-as-a-judge assessments using commercial and open-source models. Results demonstrate that our LLM-based simulator effectively emulates nuanced customer roles, and that cross-selling strategies can be implemented with minimal impact on customer satisfaction, varying by customer type.</abstract>
<identifier type="citekey">gromada-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.16/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>230</start>
<end>245</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Conversational Agents with Persona-driven User Simulations based on Large Language Models: A Sales Bot Case Study
%A Gromada, Justyna
%A Kasicka, Alicja
%A Komkowska, Ewa
%A Krajewski, Lukasz
%A Krawczyk, Natalia
%A Veyret, Morgan
%A Przybył, Bartosz
%A Rojas-Barahona, Lina M.
%A Szczerbak, Michał K.
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F gromada-etal-2025-evaluating
%X We present a novel approach to conversational agent evaluation using Persona-driven User Simulations based on Large Language Models (LLMs). Our methodology first uses LLMs to generate diverse customer personas, which are then used to configure a single LLM-based user simulator. This simulator evaluates SalesBot 2.0, a proactive conversational sales agent. We introduce a dataset of these personas, along with corresponding goals and conversation scenarios, enabling comprehensive testing across different customer types with varying assertiveness levels and precision of needs. Our evaluation framework assesses both the simulator’s adherence to persona instructions and the bot’s performance across multiple dimensions, combining human annotation with LLM-as-a-judge assessments using commercial and open-source models. Results demonstrate that our LLM-based simulator effectively emulates nuanced customer roles, and that cross-selling strategies can be implemented with minimal impact on customer satisfaction, varying by customer type.
%U https://aclanthology.org/2025.emnlp-industry.16/
%P 230-245
Markdown (Informal)
[Evaluating Conversational Agents with Persona-driven User Simulations based on Large Language Models: A Sales Bot Case Study](https://aclanthology.org/2025.emnlp-industry.16/) (Gromada et al., EMNLP 2025)
ACL
- Justyna Gromada, Alicja Kasicka, Ewa Komkowska, Lukasz Krajewski, Natalia Krawczyk, Morgan Veyret, Bartosz Przybył, Lina M. Rojas-Barahona, and Michał K. Szczerbak. 2025. Evaluating Conversational Agents with Persona-driven User Simulations based on Large Language Models: A Sales Bot Case Study. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 230–245, Suzhou (China). Association for Computational Linguistics.