@inproceedings{lovering-etal-2026-finding,
title = "On Finding Inconsistencies in Documents",
author = "Lovering, Charles and
Ebner, Seth and
Smock, Brandon and
Krumdick, Michael and
Rabbani, Muhammad Saad and
Muhammad, Ahmed and
Reddy, Varshini and
Tanner, Chris",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1675/",
pages = "33523--33564",
ISBN = "979-8-89176-395-1",
abstract = "Professionals in academia, law, and finance audit their documents because inconsistencies can result in monetary, reputational, and scientific costs. Language models (LMs) have the potential to dramatically speed up this auditing process. To understand their abilities, we introduce a benchmark, FIND (**F**inding **IN**consistencies in **D**ocuments), where each example is a document with an inconsistency inserted manually by a domain expert. Despite the documents being long, technical, and complex, the best-performing model ({`}gpt-5{`}) recovered 64{\%} of the inserted inconsistencies. Surprisingly, `gpt-5{`} also found inconsistencies already present in the original documents. For example, on 50 arXiv papers, we judged 136 out of 196 of the model{'}s suggestions to be legitimate inconsistencies missed by the original authors. However, despite these findings, even the best models miss almost half of the inconsistencies in FIND, demonstrating that inconsistency detection is still a challenging task."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lovering-etal-2026-finding">
<titleInfo>
<title>On Finding Inconsistencies in Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Charles</namePart>
<namePart type="family">Lovering</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Ebner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brandon</namePart>
<namePart type="family">Smock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Krumdick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="given">Saad</namePart>
<namePart type="family">Rabbani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Varshini</namePart>
<namePart type="family">Reddy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Tanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Professionals in academia, law, and finance audit their documents because inconsistencies can result in monetary, reputational, and scientific costs. Language models (LMs) have the potential to dramatically speed up this auditing process. To understand their abilities, we introduce a benchmark, FIND (**F**inding **IN**consistencies in **D**ocuments), where each example is a document with an inconsistency inserted manually by a domain expert. Despite the documents being long, technical, and complex, the best-performing model (‘gpt-5‘) recovered 64% of the inserted inconsistencies. Surprisingly, ‘gpt-5‘ also found inconsistencies already present in the original documents. For example, on 50 arXiv papers, we judged 136 out of 196 of the model’s suggestions to be legitimate inconsistencies missed by the original authors. However, despite these findings, even the best models miss almost half of the inconsistencies in FIND, demonstrating that inconsistency detection is still a challenging task.</abstract>
<identifier type="citekey">lovering-etal-2026-finding</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1675/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>33523</start>
<end>33564</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Finding Inconsistencies in Documents
%A Lovering, Charles
%A Ebner, Seth
%A Smock, Brandon
%A Krumdick, Michael
%A Rabbani, Muhammad Saad
%A Muhammad, Ahmed
%A Reddy, Varshini
%A Tanner, Chris
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F lovering-etal-2026-finding
%X Professionals in academia, law, and finance audit their documents because inconsistencies can result in monetary, reputational, and scientific costs. Language models (LMs) have the potential to dramatically speed up this auditing process. To understand their abilities, we introduce a benchmark, FIND (**F**inding **IN**consistencies in **D**ocuments), where each example is a document with an inconsistency inserted manually by a domain expert. Despite the documents being long, technical, and complex, the best-performing model (‘gpt-5‘) recovered 64% of the inserted inconsistencies. Surprisingly, ‘gpt-5‘ also found inconsistencies already present in the original documents. For example, on 50 arXiv papers, we judged 136 out of 196 of the model’s suggestions to be legitimate inconsistencies missed by the original authors. However, despite these findings, even the best models miss almost half of the inconsistencies in FIND, demonstrating that inconsistency detection is still a challenging task.
%U https://aclanthology.org/2026.findings-acl.1675/
%P 33523-33564
Markdown (Informal)
[On Finding Inconsistencies in Documents](https://aclanthology.org/2026.findings-acl.1675/) (Lovering et al., Findings 2026)
ACL
- Charles Lovering, Seth Ebner, Brandon Smock, Michael Krumdick, Muhammad Saad Rabbani, Ahmed Muhammad, Varshini Reddy, and Chris Tanner. 2026. On Finding Inconsistencies in Documents. In Findings of the Association for Computational Linguistics: ACL 2026, pages 33523–33564, San Diego, California, United States. Association for Computational Linguistics.