@inproceedings{de-langis-etal-2026-mary,
title = "{M}ary, the Cheeseburger-Eating Vegetarian: Do {LLM}s Recognize Incoherence in Narratives?",
author = {De Langis, Karin and
{\"O}ncel, P{\"u}ren and
Peters, Ryan and
Elfenbein, Andrew and
Allen, Laura Kristen and
Schramm, Andreas and
Kang, Dongyeop},
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.280/",
pages = "5958--5970",
ISBN = "979-8-89176-380-7",
abstract = "Leveraging a dataset of paired narratives, we investigate the extent to which large language models (LLMs) can reliably separate incoherent and coherent stories.A probing study finds that LLMs' internal representations can reliably identify incoherent events in narratives. However, this separation disappears by the narrative{'}s end, and weakens when the differences between coherent and incoherent stories are more subtle. When asked to rate overall coherence of narratives after reading, LLMs generate responses that fail to satisfactorily separate the coherent and incoherent narratives.Reasoning models tested do not eliminate these deficits, indicating that thought strings may not be able to fully address the discrepancy between model internal state and behavior.Additionally, we find that LLMs appear to be more sensitive to incoherence resulting from an event that violates the setting (e.g., a rainy day in the desert) than to incoherence arising from a character violating an established trait (e.g., Mary, a vegetarian, later orders a cheeseburger), suggesting that LLMs may rely more on prototypical world knowledge than building coherence through a meaning-based world model of the narrative setting. Together, our results indicate that LLMs lack robustness in their ability to recognize incoherence in narratives."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-langis-etal-2026-mary">
<titleInfo>
<title>Mary, the Cheeseburger-Eating Vegetarian: Do LLMs Recognize Incoherence in Narratives?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">De Langis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Püren</namePart>
<namePart type="family">Öncel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Peters</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Elfenbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="given">Kristen</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Schramm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongyeop</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>Leveraging a dataset of paired narratives, we investigate the extent to which large language models (LLMs) can reliably separate incoherent and coherent stories.A probing study finds that LLMs’ internal representations can reliably identify incoherent events in narratives. However, this separation disappears by the narrative’s end, and weakens when the differences between coherent and incoherent stories are more subtle. When asked to rate overall coherence of narratives after reading, LLMs generate responses that fail to satisfactorily separate the coherent and incoherent narratives.Reasoning models tested do not eliminate these deficits, indicating that thought strings may not be able to fully address the discrepancy between model internal state and behavior.Additionally, we find that LLMs appear to be more sensitive to incoherence resulting from an event that violates the setting (e.g., a rainy day in the desert) than to incoherence arising from a character violating an established trait (e.g., Mary, a vegetarian, later orders a cheeseburger), suggesting that LLMs may rely more on prototypical world knowledge than building coherence through a meaning-based world model of the narrative setting. Together, our results indicate that LLMs lack robustness in their ability to recognize incoherence in narratives.</abstract>
<identifier type="citekey">de-langis-etal-2026-mary</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.280/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5958</start>
<end>5970</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mary, the Cheeseburger-Eating Vegetarian: Do LLMs Recognize Incoherence in Narratives?
%A De Langis, Karin
%A Öncel, Püren
%A Peters, Ryan
%A Elfenbein, Andrew
%A Allen, Laura Kristen
%A Schramm, Andreas
%A Kang, Dongyeop
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F de-langis-etal-2026-mary
%X Leveraging a dataset of paired narratives, we investigate the extent to which large language models (LLMs) can reliably separate incoherent and coherent stories.A probing study finds that LLMs’ internal representations can reliably identify incoherent events in narratives. However, this separation disappears by the narrative’s end, and weakens when the differences between coherent and incoherent stories are more subtle. When asked to rate overall coherence of narratives after reading, LLMs generate responses that fail to satisfactorily separate the coherent and incoherent narratives.Reasoning models tested do not eliminate these deficits, indicating that thought strings may not be able to fully address the discrepancy between model internal state and behavior.Additionally, we find that LLMs appear to be more sensitive to incoherence resulting from an event that violates the setting (e.g., a rainy day in the desert) than to incoherence arising from a character violating an established trait (e.g., Mary, a vegetarian, later orders a cheeseburger), suggesting that LLMs may rely more on prototypical world knowledge than building coherence through a meaning-based world model of the narrative setting. Together, our results indicate that LLMs lack robustness in their ability to recognize incoherence in narratives.
%U https://aclanthology.org/2026.eacl-long.280/
%P 5958-5970
Markdown (Informal)
[Mary, the Cheeseburger-Eating Vegetarian: Do LLMs Recognize Incoherence in Narratives?](https://aclanthology.org/2026.eacl-long.280/) (De Langis et al., EACL 2026)
ACL
- Karin De Langis, Püren Öncel, Ryan Peters, Andrew Elfenbein, Laura Kristen Allen, Andreas Schramm, and Dongyeop Kang. 2026. Mary, the Cheeseburger-Eating Vegetarian: Do LLMs Recognize Incoherence in Narratives?. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 5958–5970, Rabat, Morocco. Association for Computational Linguistics.