@inproceedings{takayanagi-etal-2025-gpt,
title = "Can {GPT}-4 Sway Experts' Investment Decisions?",
author = "Takayanagi, Takehiro and
Takamura, Hiroya and
Izumi, Kiyoshi and
Chen, Chung-Chi",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.22/",
doi = "10.18653/v1/2025.findings-naacl.22",
pages = "374--383",
ISBN = "979-8-89176-195-7",
abstract = "In the post-Turing era, evaluating large language models (LLMs) involves assessing generated text based on readers' decisions rather than merely its indistinguishability from human-produced content. This paper explores how LLM-generated text impacts readers' decisions, focusing on both amateur and expert audiences. Our findings indicate that GPT-4 can generate persuasive analyses affecting the decisions of both amateurs and professionals. Furthermore, we evaluate the generated text from the aspects of grammar, convincingness, logical coherence, and usefulness. The results highlight a high correlation between real-world evaluation through audience decisions and the current multi-dimensional evaluators commonly used for generative models. Overall, this paper shows the potential and risk of using generated text to sway human decisions and also points out a new direction for evaluating generated text, i.e., leveraging the decisions of readers. We release our dataset to assist future research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="takayanagi-etal-2025-gpt">
<titleInfo>
<title>Can GPT-4 Sway Experts’ Investment Decisions?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takehiro</namePart>
<namePart type="family">Takayanagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiyoshi</namePart>
<namePart type="family">Izumi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>In the post-Turing era, evaluating large language models (LLMs) involves assessing generated text based on readers’ decisions rather than merely its indistinguishability from human-produced content. This paper explores how LLM-generated text impacts readers’ decisions, focusing on both amateur and expert audiences. Our findings indicate that GPT-4 can generate persuasive analyses affecting the decisions of both amateurs and professionals. Furthermore, we evaluate the generated text from the aspects of grammar, convincingness, logical coherence, and usefulness. The results highlight a high correlation between real-world evaluation through audience decisions and the current multi-dimensional evaluators commonly used for generative models. Overall, this paper shows the potential and risk of using generated text to sway human decisions and also points out a new direction for evaluating generated text, i.e., leveraging the decisions of readers. We release our dataset to assist future research.</abstract>
<identifier type="citekey">takayanagi-etal-2025-gpt</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.22</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.22/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>374</start>
<end>383</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can GPT-4 Sway Experts’ Investment Decisions?
%A Takayanagi, Takehiro
%A Takamura, Hiroya
%A Izumi, Kiyoshi
%A Chen, Chung-Chi
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F takayanagi-etal-2025-gpt
%X In the post-Turing era, evaluating large language models (LLMs) involves assessing generated text based on readers’ decisions rather than merely its indistinguishability from human-produced content. This paper explores how LLM-generated text impacts readers’ decisions, focusing on both amateur and expert audiences. Our findings indicate that GPT-4 can generate persuasive analyses affecting the decisions of both amateurs and professionals. Furthermore, we evaluate the generated text from the aspects of grammar, convincingness, logical coherence, and usefulness. The results highlight a high correlation between real-world evaluation through audience decisions and the current multi-dimensional evaluators commonly used for generative models. Overall, this paper shows the potential and risk of using generated text to sway human decisions and also points out a new direction for evaluating generated text, i.e., leveraging the decisions of readers. We release our dataset to assist future research.
%R 10.18653/v1/2025.findings-naacl.22
%U https://aclanthology.org/2025.findings-naacl.22/
%U https://doi.org/10.18653/v1/2025.findings-naacl.22
%P 374-383
Markdown (Informal)
[Can GPT-4 Sway Experts’ Investment Decisions?](https://aclanthology.org/2025.findings-naacl.22/) (Takayanagi et al., Findings 2025)
ACL
- Takehiro Takayanagi, Hiroya Takamura, Kiyoshi Izumi, and Chung-Chi Chen. 2025. Can GPT-4 Sway Experts’ Investment Decisions?. In Findings of the Association for Computational Linguistics: NAACL 2025, pages 374–383, Albuquerque, New Mexico. Association for Computational Linguistics.