@inproceedings{buz-etal-2024-investigating,
title = "Investigating Wit, Creativity, and Detectability of Large Language Models in Domain-Specific Writing Style Adaptation of {R}eddit{'}s Showerthoughts",
author = "Buz, Tolga and
Frost, Benjamin and
Genchev, Nikola and
Schneider, Moritz and
Kaffee, Lucie-Aim{\'e}e and
de Melo, Gerard",
editor = "Bollegala, Danushka and
Shwartz, Vered",
booktitle = "Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.starsem-1.23/",
doi = "10.18653/v1/2024.starsem-1.23",
pages = "291--307",
abstract = "Recent Large Language Models (LLMs) have shown the ability to generate content that is difficult or impossible to distinguish from human writing. We investigate the ability of differently-sized LLMs to replicate human writing style in short, creative texts in the domain of Showerthoughts, thoughts that may occur during mundane activities. We compare GPT-2 and GPT-Neo fine-tuned on Reddit data as well as GPT-3.5 invoked in a zero-shot manner, against human-authored texts. We measure human preference on the texts across the specific dimensions that account for the quality of creative, witty texts. Additionally, we compare the ability of humans versus fine-tuned RoBERTa-based classifiers to detect AI-generated texts. We conclude that human evaluators rate the generated texts slightly worse on average regarding their creative quality, but they are unable to reliably distinguish between human-written and AI-generated texts. We further provide the dataset for creative, witty text generation based on Reddit Showerthoughts posts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buz-etal-2024-investigating">
<titleInfo>
<title>Investigating Wit, Creativity, and Detectability of Large Language Models in Domain-Specific Writing Style Adaptation of Reddit’s Showerthoughts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tolga</namePart>
<namePart type="family">Buz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Frost</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Genchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moritz</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie-Aimée</namePart>
<namePart type="family">Kaffee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">de Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent Large Language Models (LLMs) have shown the ability to generate content that is difficult or impossible to distinguish from human writing. We investigate the ability of differently-sized LLMs to replicate human writing style in short, creative texts in the domain of Showerthoughts, thoughts that may occur during mundane activities. We compare GPT-2 and GPT-Neo fine-tuned on Reddit data as well as GPT-3.5 invoked in a zero-shot manner, against human-authored texts. We measure human preference on the texts across the specific dimensions that account for the quality of creative, witty texts. Additionally, we compare the ability of humans versus fine-tuned RoBERTa-based classifiers to detect AI-generated texts. We conclude that human evaluators rate the generated texts slightly worse on average regarding their creative quality, but they are unable to reliably distinguish between human-written and AI-generated texts. We further provide the dataset for creative, witty text generation based on Reddit Showerthoughts posts.</abstract>
<identifier type="citekey">buz-etal-2024-investigating</identifier>
<identifier type="doi">10.18653/v1/2024.starsem-1.23</identifier>
<location>
<url>https://aclanthology.org/2024.starsem-1.23/</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>291</start>
<end>307</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating Wit, Creativity, and Detectability of Large Language Models in Domain-Specific Writing Style Adaptation of Reddit’s Showerthoughts
%A Buz, Tolga
%A Frost, Benjamin
%A Genchev, Nikola
%A Schneider, Moritz
%A Kaffee, Lucie-Aimée
%A de Melo, Gerard
%Y Bollegala, Danushka
%Y Shwartz, Vered
%S Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F buz-etal-2024-investigating
%X Recent Large Language Models (LLMs) have shown the ability to generate content that is difficult or impossible to distinguish from human writing. We investigate the ability of differently-sized LLMs to replicate human writing style in short, creative texts in the domain of Showerthoughts, thoughts that may occur during mundane activities. We compare GPT-2 and GPT-Neo fine-tuned on Reddit data as well as GPT-3.5 invoked in a zero-shot manner, against human-authored texts. We measure human preference on the texts across the specific dimensions that account for the quality of creative, witty texts. Additionally, we compare the ability of humans versus fine-tuned RoBERTa-based classifiers to detect AI-generated texts. We conclude that human evaluators rate the generated texts slightly worse on average regarding their creative quality, but they are unable to reliably distinguish between human-written and AI-generated texts. We further provide the dataset for creative, witty text generation based on Reddit Showerthoughts posts.
%R 10.18653/v1/2024.starsem-1.23
%U https://aclanthology.org/2024.starsem-1.23/
%U https://doi.org/10.18653/v1/2024.starsem-1.23
%P 291-307
Markdown (Informal)
[Investigating Wit, Creativity, and Detectability of Large Language Models in Domain-Specific Writing Style Adaptation of Reddit’s Showerthoughts](https://aclanthology.org/2024.starsem-1.23/) (Buz et al., *SEM 2024)
ACL
- Tolga Buz, Benjamin Frost, Nikola Genchev, Moritz Schneider, Lucie-Aimée Kaffee, and Gerard de Melo. 2024. Investigating Wit, Creativity, and Detectability of Large Language Models in Domain-Specific Writing Style Adaptation of Reddit’s Showerthoughts. In Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024), pages 291–307, Mexico City, Mexico. Association for Computational Linguistics.