@inproceedings{sunny-etal-2025-stories,
title = "From Stories to Statistics: Methodological Biases in {LLM}-Based Narrative Flow Quantification",
author = "Sunny, Amal and
Gupta, Advay and
Chandak, Yashashree and
Sreekumar, Vishnu",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.14/",
doi = "10.18653/v1/2025.conll-1.14",
pages = "201--215",
ISBN = "979-8-89176-271-8",
abstract = "Large Language Models (LLMs) have made significant contributions to cognitive science research. One area of application is narrative understanding. Sap et al. (2022) introduced $\textit{sequentiality}$, an LLM-derived measure that assesses the coherence of a story based on word probability distributions. They reported that recalled stories flowed less sequentially than imagined stories. However, the robustness and generalizability of this narrative flow measure remain unverified. To assess generalizability, we apply $\textit{sequentiality}$ derived from three different LLMs to a new dataset of matched autobiographical and biographical paragraphs. Contrary to previous results, we fail to find a significant difference in narrative flow between autobiographies and biographies. Further investigation reveals biases in the original data collection process, where topic selection systematically influences sequentiality scores. Adjusting for these biases substantially reduces the originally reported effect size. A validation exercise using LLM-generated stories with ``good'' and ``poor'' flow further highlights the flaws in the original formulation of sequentiality. Our findings suggest that LLM-based narrative flow quantification is susceptible to methodological artifacts. Finally, we provide some suggestions for modifying the $\textit{sequentiality}$ formula to accurately capture narrative flow."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sunny-etal-2025-stories">
<titleInfo>
<title>From Stories to Statistics: Methodological Biases in LLM-Based Narrative Flow Quantification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amal</namePart>
<namePart type="family">Sunny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Advay</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yashashree</namePart>
<namePart type="family">Chandak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishnu</namePart>
<namePart type="family">Sreekumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) have made significant contributions to cognitive science research. One area of application is narrative understanding. Sap et al. (2022) introduced sequentiality, an LLM-derived measure that assesses the coherence of a story based on word probability distributions. They reported that recalled stories flowed less sequentially than imagined stories. However, the robustness and generalizability of this narrative flow measure remain unverified. To assess generalizability, we apply sequentiality derived from three different LLMs to a new dataset of matched autobiographical and biographical paragraphs. Contrary to previous results, we fail to find a significant difference in narrative flow between autobiographies and biographies. Further investigation reveals biases in the original data collection process, where topic selection systematically influences sequentiality scores. Adjusting for these biases substantially reduces the originally reported effect size. A validation exercise using LLM-generated stories with “good” and “poor” flow further highlights the flaws in the original formulation of sequentiality. Our findings suggest that LLM-based narrative flow quantification is susceptible to methodological artifacts. Finally, we provide some suggestions for modifying the sequentiality formula to accurately capture narrative flow.</abstract>
<identifier type="citekey">sunny-etal-2025-stories</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.14</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.14/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>201</start>
<end>215</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Stories to Statistics: Methodological Biases in LLM-Based Narrative Flow Quantification
%A Sunny, Amal
%A Gupta, Advay
%A Chandak, Yashashree
%A Sreekumar, Vishnu
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F sunny-etal-2025-stories
%X Large Language Models (LLMs) have made significant contributions to cognitive science research. One area of application is narrative understanding. Sap et al. (2022) introduced sequentiality, an LLM-derived measure that assesses the coherence of a story based on word probability distributions. They reported that recalled stories flowed less sequentially than imagined stories. However, the robustness and generalizability of this narrative flow measure remain unverified. To assess generalizability, we apply sequentiality derived from three different LLMs to a new dataset of matched autobiographical and biographical paragraphs. Contrary to previous results, we fail to find a significant difference in narrative flow between autobiographies and biographies. Further investigation reveals biases in the original data collection process, where topic selection systematically influences sequentiality scores. Adjusting for these biases substantially reduces the originally reported effect size. A validation exercise using LLM-generated stories with “good” and “poor” flow further highlights the flaws in the original formulation of sequentiality. Our findings suggest that LLM-based narrative flow quantification is susceptible to methodological artifacts. Finally, we provide some suggestions for modifying the sequentiality formula to accurately capture narrative flow.
%R 10.18653/v1/2025.conll-1.14
%U https://aclanthology.org/2025.conll-1.14/
%U https://doi.org/10.18653/v1/2025.conll-1.14
%P 201-215
Markdown (Informal)
[From Stories to Statistics: Methodological Biases in LLM-Based Narrative Flow Quantification](https://aclanthology.org/2025.conll-1.14/) (Sunny et al., CoNLL 2025)
ACL