@inproceedings{sat-etal-2025-modelling,
title = "Modelling the Relative Contributions of Stylistic Features in Forensic Authorship Attribution",
author = "Sat, G. {\c{C}}a{\u{g}}atay and
Blake, John and
Pyshkin, Evgeny",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.123/",
pages = "1066--1073",
abstract = "This paper explores the extent to which stylistic features contribute to the task of authorship attribution in forensic contexts. Drawing on a filtered subset of the Enron email corpus, the study operationalizes stylistic indicators across four groups: lexical, syntactic, orthographic, and discoursal. Using R Programming Language for feature engineering and logistic regression modelling, we systematically assessed both the individual and interactive effects of these features on attribution accuracy. Results show that n-gram similarity consistently outperformed all other features, with the combined model of n-gram similarity and its interaction with other features achieving accuracy, precision and F1 scores of 91.6{\%}, 93.3{\%} and 91.7{\%} respectively. The model was subsequently evaluated on a subset of the TEL corpus to assess its applicability in a forensic setting. The findings highlight the dominant role of lexical similarity and suggest that integrating interaction effects can yield further performance gains in forensic authorship analysis."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sat-etal-2025-modelling">
<titleInfo>
<title>Modelling the Relative Contributions of Stylistic Features in Forensic Authorship Attribution</title>
</titleInfo>
<name type="personal">
<namePart type="given">G</namePart>
<namePart type="given">Çağatay</namePart>
<namePart type="family">Sat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Blake</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evgeny</namePart>
<namePart type="family">Pyshkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper explores the extent to which stylistic features contribute to the task of authorship attribution in forensic contexts. Drawing on a filtered subset of the Enron email corpus, the study operationalizes stylistic indicators across four groups: lexical, syntactic, orthographic, and discoursal. Using R Programming Language for feature engineering and logistic regression modelling, we systematically assessed both the individual and interactive effects of these features on attribution accuracy. Results show that n-gram similarity consistently outperformed all other features, with the combined model of n-gram similarity and its interaction with other features achieving accuracy, precision and F1 scores of 91.6%, 93.3% and 91.7% respectively. The model was subsequently evaluated on a subset of the TEL corpus to assess its applicability in a forensic setting. The findings highlight the dominant role of lexical similarity and suggest that integrating interaction effects can yield further performance gains in forensic authorship analysis.</abstract>
<identifier type="citekey">sat-etal-2025-modelling</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.123/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1066</start>
<end>1073</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modelling the Relative Contributions of Stylistic Features in Forensic Authorship Attribution
%A Sat, G. Çağatay
%A Blake, John
%A Pyshkin, Evgeny
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F sat-etal-2025-modelling
%X This paper explores the extent to which stylistic features contribute to the task of authorship attribution in forensic contexts. Drawing on a filtered subset of the Enron email corpus, the study operationalizes stylistic indicators across four groups: lexical, syntactic, orthographic, and discoursal. Using R Programming Language for feature engineering and logistic regression modelling, we systematically assessed both the individual and interactive effects of these features on attribution accuracy. Results show that n-gram similarity consistently outperformed all other features, with the combined model of n-gram similarity and its interaction with other features achieving accuracy, precision and F1 scores of 91.6%, 93.3% and 91.7% respectively. The model was subsequently evaluated on a subset of the TEL corpus to assess its applicability in a forensic setting. The findings highlight the dominant role of lexical similarity and suggest that integrating interaction effects can yield further performance gains in forensic authorship analysis.
%U https://aclanthology.org/2025.ranlp-1.123/
%P 1066-1073
Markdown (Informal)
[Modelling the Relative Contributions of Stylistic Features in Forensic Authorship Attribution](https://aclanthology.org/2025.ranlp-1.123/) (Sat et al., RANLP 2025)
ACL