@inproceedings{jones-bergen-2024-gpt,
title = "Does {GPT}-4 pass the {T}uring test?",
author = "Jones, Cameron and
Bergen, Ben",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.290",
doi = "10.18653/v1/2024.naacl-long.290",
pages = "5183--5210",
abstract = "We evaluated GPT-4 in a public online Turing test. The best-performing GPT-4 prompt passed in 49.7{\%} of games, outperforming ELIZA (22{\%}) and GPT-3.5 (20{\%}), but falling short of the baseline set by human participants (66{\%}). Participants{'} decisions were based mainly on linguistic style (35{\%}) and socioemotional traits (27{\%}), supporting the idea that intelligence, narrowly conceived, is not sufficient to pass the Turing test. Participant knowledge about LLMs and number of games played positively correlated with accuracy in detecting AI, suggesting learning and practice as possible strategies to mitigate deception. Despite known limitations as a test of intelligence, we argue that the Turing test continues to be relevant as an assessment of naturalistic communication and deception. AI models with the ability to masquerade as humans could have widespread societal consequences, and we analyse the effectiveness of different strategies and criteria for judging humanlikeness.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jones-bergen-2024-gpt">
<titleInfo>
<title>Does GPT-4 pass the Turing test?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cameron</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Bergen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We evaluated GPT-4 in a public online Turing test. The best-performing GPT-4 prompt passed in 49.7% of games, outperforming ELIZA (22%) and GPT-3.5 (20%), but falling short of the baseline set by human participants (66%). Participants’ decisions were based mainly on linguistic style (35%) and socioemotional traits (27%), supporting the idea that intelligence, narrowly conceived, is not sufficient to pass the Turing test. Participant knowledge about LLMs and number of games played positively correlated with accuracy in detecting AI, suggesting learning and practice as possible strategies to mitigate deception. Despite known limitations as a test of intelligence, we argue that the Turing test continues to be relevant as an assessment of naturalistic communication and deception. AI models with the ability to masquerade as humans could have widespread societal consequences, and we analyse the effectiveness of different strategies and criteria for judging humanlikeness.</abstract>
<identifier type="citekey">jones-bergen-2024-gpt</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-long.290</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.290</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>5183</start>
<end>5210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does GPT-4 pass the Turing test?
%A Jones, Cameron
%A Bergen, Ben
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F jones-bergen-2024-gpt
%X We evaluated GPT-4 in a public online Turing test. The best-performing GPT-4 prompt passed in 49.7% of games, outperforming ELIZA (22%) and GPT-3.5 (20%), but falling short of the baseline set by human participants (66%). Participants’ decisions were based mainly on linguistic style (35%) and socioemotional traits (27%), supporting the idea that intelligence, narrowly conceived, is not sufficient to pass the Turing test. Participant knowledge about LLMs and number of games played positively correlated with accuracy in detecting AI, suggesting learning and practice as possible strategies to mitigate deception. Despite known limitations as a test of intelligence, we argue that the Turing test continues to be relevant as an assessment of naturalistic communication and deception. AI models with the ability to masquerade as humans could have widespread societal consequences, and we analyse the effectiveness of different strategies and criteria for judging humanlikeness.
%R 10.18653/v1/2024.naacl-long.290
%U https://aclanthology.org/2024.naacl-long.290
%U https://doi.org/10.18653/v1/2024.naacl-long.290
%P 5183-5210
Markdown (Informal)
[Does GPT-4 pass the Turing test?](https://aclanthology.org/2024.naacl-long.290) (Jones & Bergen, NAACL 2024)
ACL
- Cameron Jones and Ben Bergen. 2024. Does GPT-4 pass the Turing test?. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 5183–5210, Mexico City, Mexico. Association for Computational Linguistics.