@inproceedings{bagdasarov-alves-2025-like,
title = "Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts",
author = "Bagdasarov, Sergei and
Alves, Diego",
editor = "Arachchige, Isuri Nanomi and
Frontini, Francesca and
Mitkov, Ruslan and
Rayson, Paul",
booktitle = "Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.lm4dh-1.4/",
pages = "38--47",
abstract = "The purpose of this study is to analyze lexical and syntactic features in human-written texts and machine-generated texts produced by three state-of-the-art large language models: GPT-4o, Llama 3.1 and Qwen 2.5. We use Kullback-Leibler divergence to quantify the dissimilarity between humans and LLMs as well as to identify relevant features for comparison. We test the predictive power of our features using binary and multi-label random forest classifiers. The classifiers achieve robust performance of above 80{\%} for multi-label classification and above 90{\%} for binary classification. Our results point to substantial differences between human- and machine-generated texts. Human writers show higher variability in the use of syntactic resources, while LLMs score higher in lexical variability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bagdasarov-alves-2025-like">
<titleInfo>
<title>Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergei</namePart>
<namePart type="family">Bagdasarov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="given">Nanomi</namePart>
<namePart type="family">Arachchige</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesca</namePart>
<namePart type="family">Frontini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The purpose of this study is to analyze lexical and syntactic features in human-written texts and machine-generated texts produced by three state-of-the-art large language models: GPT-4o, Llama 3.1 and Qwen 2.5. We use Kullback-Leibler divergence to quantify the dissimilarity between humans and LLMs as well as to identify relevant features for comparison. We test the predictive power of our features using binary and multi-label random forest classifiers. The classifiers achieve robust performance of above 80% for multi-label classification and above 90% for binary classification. Our results point to substantial differences between human- and machine-generated texts. Human writers show higher variability in the use of syntactic resources, while LLMs score higher in lexical variability.</abstract>
<identifier type="citekey">bagdasarov-alves-2025-like</identifier>
<location>
<url>https://aclanthology.org/2025.lm4dh-1.4/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>38</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts
%A Bagdasarov, Sergei
%A Alves, Diego
%Y Arachchige, Isuri Nanomi
%Y Frontini, Francesca
%Y Mitkov, Ruslan
%Y Rayson, Paul
%S Proceedings of the First on Natural Language Processing and Language Models for Digital Humanities
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F bagdasarov-alves-2025-like
%X The purpose of this study is to analyze lexical and syntactic features in human-written texts and machine-generated texts produced by three state-of-the-art large language models: GPT-4o, Llama 3.1 and Qwen 2.5. We use Kullback-Leibler divergence to quantify the dissimilarity between humans and LLMs as well as to identify relevant features for comparison. We test the predictive power of our features using binary and multi-label random forest classifiers. The classifiers achieve robust performance of above 80% for multi-label classification and above 90% for binary classification. Our results point to substantial differences between human- and machine-generated texts. Human writers show higher variability in the use of syntactic resources, while LLMs score higher in lexical variability.
%U https://aclanthology.org/2025.lm4dh-1.4/
%P 38-47
Markdown (Informal)
[Like a Human? A Linguistic Analysis of Human-written and Machine-generated Scientific Texts](https://aclanthology.org/2025.lm4dh-1.4/) (Bagdasarov & Alves, LM4DH 2025)
ACL