@inproceedings{shao-etal-2024-natural,
title = "A Natural Approach for Synthetic Short-Form Text Analysis",
author = "Shao, Ruiting and
Schwarz, Ryan and
Clifton, Christopher and
Delp, Edward",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.92",
pages = "1034--1042",
abstract = "Detecting synthetically generated text in the wild has become increasingly difficult with advances in Natural Language Generation techniques and the proliferation of freely available Large Language Models (LLMs). Social media and news sites can be flooded with synthetically generated misinformation via tweets and posts while authentic users can inadvertently spread this text via shares and retweets. Most modern natural language processing techniques designed to detect synthetically generated text focus primarily on long-form content, such as news articles, or incorporate stylometric characteristics and metadata during their analysis. Unfortunately, for short form text like tweets, this information is often unavailable, usually detached from its original source, displayed out of context, and is often too short or informal to yield significant information from stylometry. This paper proposes a method of detecting synthetically generated tweets via a Transformer architecture and incorporating unique style-based features. Additionally, we have created a new dataset consisting of human-generated and Large Language Model generated tweets for 4 topics and another dataset consisting of tweets paraphrased by 3 different paraphrase models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shao-etal-2024-natural">
<titleInfo>
<title>A Natural Approach for Synthetic Short-Form Text Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruiting</namePart>
<namePart type="family">Shao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Schwarz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Clifton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Delp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Detecting synthetically generated text in the wild has become increasingly difficult with advances in Natural Language Generation techniques and the proliferation of freely available Large Language Models (LLMs). Social media and news sites can be flooded with synthetically generated misinformation via tweets and posts while authentic users can inadvertently spread this text via shares and retweets. Most modern natural language processing techniques designed to detect synthetically generated text focus primarily on long-form content, such as news articles, or incorporate stylometric characteristics and metadata during their analysis. Unfortunately, for short form text like tweets, this information is often unavailable, usually detached from its original source, displayed out of context, and is often too short or informal to yield significant information from stylometry. This paper proposes a method of detecting synthetically generated tweets via a Transformer architecture and incorporating unique style-based features. Additionally, we have created a new dataset consisting of human-generated and Large Language Model generated tweets for 4 topics and another dataset consisting of tweets paraphrased by 3 different paraphrase models.</abstract>
<identifier type="citekey">shao-etal-2024-natural</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.92</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>1034</start>
<end>1042</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Natural Approach for Synthetic Short-Form Text Analysis
%A Shao, Ruiting
%A Schwarz, Ryan
%A Clifton, Christopher
%A Delp, Edward
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F shao-etal-2024-natural
%X Detecting synthetically generated text in the wild has become increasingly difficult with advances in Natural Language Generation techniques and the proliferation of freely available Large Language Models (LLMs). Social media and news sites can be flooded with synthetically generated misinformation via tweets and posts while authentic users can inadvertently spread this text via shares and retweets. Most modern natural language processing techniques designed to detect synthetically generated text focus primarily on long-form content, such as news articles, or incorporate stylometric characteristics and metadata during their analysis. Unfortunately, for short form text like tweets, this information is often unavailable, usually detached from its original source, displayed out of context, and is often too short or informal to yield significant information from stylometry. This paper proposes a method of detecting synthetically generated tweets via a Transformer architecture and incorporating unique style-based features. Additionally, we have created a new dataset consisting of human-generated and Large Language Model generated tweets for 4 topics and another dataset consisting of tweets paraphrased by 3 different paraphrase models.
%U https://aclanthology.org/2024.lrec-main.92
%P 1034-1042
Markdown (Informal)
[A Natural Approach for Synthetic Short-Form Text Analysis](https://aclanthology.org/2024.lrec-main.92) (Shao et al., LREC-COLING 2024)
ACL
- Ruiting Shao, Ryan Schwarz, Christopher Clifton, and Edward Delp. 2024. A Natural Approach for Synthetic Short-Form Text Analysis. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 1034–1042, Torino, Italia. ELRA and ICCL.