@inproceedings{shastry-etal-2025-entailment,
title = "Entailment Progressions: {A} Robust Approach to Evaluating Reasoning Within Larger Discourse",
author = "Shastry, Rishabh and
Chiril, Patricia and
Charney, Joshua and
Uminsky, David",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.66/",
pages = "651--660",
ISBN = "978-9908-53-109-0",
abstract = "Textual entailment, or the ability to deduce whether a proposed hypothesis is logically supported by a given premise, has historically been applied to the evaluation of language modelling efficiency in tasks like question answering and text summarization. However, we hypothesize that these zero-shot entailment evaluations can be extended to the task of evaluating discourse within larger textual narratives. In this paper, we propose a simple but effective method that sequentially evaluates changes in textual entailment between sentences within a larger text, in an approach we denote as ``Entailment Progressions''. These entailment progressions aim to capture the inference relations between sentences as an underlying component capable of distinguishing texts generated from various models and procedures. Our results suggest that entailment progressions can be used to effectively distinguish between machine-generated and human-authored texts across multiple established benchmark corpora and our own EP4MGT dataset. Additionally, our method displays robustness in performance when evaluated on paraphrased texts a technique that has historically affected the performance of well-established metrics when distinguishing between machine generated and human authored texts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shastry-etal-2025-entailment">
<titleInfo>
<title>Entailment Progressions: A Robust Approach to Evaluating Reasoning Within Larger Discourse</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rishabh</namePart>
<namePart type="family">Shastry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patricia</namePart>
<namePart type="family">Chiril</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Charney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Uminsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>Textual entailment, or the ability to deduce whether a proposed hypothesis is logically supported by a given premise, has historically been applied to the evaluation of language modelling efficiency in tasks like question answering and text summarization. However, we hypothesize that these zero-shot entailment evaluations can be extended to the task of evaluating discourse within larger textual narratives. In this paper, we propose a simple but effective method that sequentially evaluates changes in textual entailment between sentences within a larger text, in an approach we denote as “Entailment Progressions”. These entailment progressions aim to capture the inference relations between sentences as an underlying component capable of distinguishing texts generated from various models and procedures. Our results suggest that entailment progressions can be used to effectively distinguish between machine-generated and human-authored texts across multiple established benchmark corpora and our own EP4MGT dataset. Additionally, our method displays robustness in performance when evaluated on paraphrased texts a technique that has historically affected the performance of well-established metrics when distinguishing between machine generated and human authored texts.</abstract>
<identifier type="citekey">shastry-etal-2025-entailment</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.66/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>651</start>
<end>660</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Entailment Progressions: A Robust Approach to Evaluating Reasoning Within Larger Discourse
%A Shastry, Rishabh
%A Chiril, Patricia
%A Charney, Joshua
%A Uminsky, David
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F shastry-etal-2025-entailment
%X Textual entailment, or the ability to deduce whether a proposed hypothesis is logically supported by a given premise, has historically been applied to the evaluation of language modelling efficiency in tasks like question answering and text summarization. However, we hypothesize that these zero-shot entailment evaluations can be extended to the task of evaluating discourse within larger textual narratives. In this paper, we propose a simple but effective method that sequentially evaluates changes in textual entailment between sentences within a larger text, in an approach we denote as “Entailment Progressions”. These entailment progressions aim to capture the inference relations between sentences as an underlying component capable of distinguishing texts generated from various models and procedures. Our results suggest that entailment progressions can be used to effectively distinguish between machine-generated and human-authored texts across multiple established benchmark corpora and our own EP4MGT dataset. Additionally, our method displays robustness in performance when evaluated on paraphrased texts a technique that has historically affected the performance of well-established metrics when distinguishing between machine generated and human authored texts.
%U https://aclanthology.org/2025.nodalida-1.66/
%P 651-660
Markdown (Informal)
[Entailment Progressions: A Robust Approach to Evaluating Reasoning Within Larger Discourse](https://aclanthology.org/2025.nodalida-1.66/) (Shastry et al., NoDaLiDa 2025)
ACL