@inproceedings{marinescu-etal-2025-factreasoner,
title = "{F}act{R}easoner: A Probabilistic Approach to Long-Form Factuality Assessment for Large Language Models",
author = "Marinescu, Radu and
Bhattacharjya, Debarun and
Lee, Junkyu and
Tchrakian, Tigran T. and
Carnerero-Cano, Javier and
Hou, Yufang and
Daly, Elizabeth M. and
Pascale, Alessandra",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.785/",
doi = "10.18653/v1/2025.findings-emnlp.785",
pages = "14547--14577",
ISBN = "979-8-89176-335-7",
abstract = "Large language models (LLMs) have achieved remarkable success in generative tasks, yet they often fall short in ensuring the factual accuracy of their outputs thus limiting their reliability in real-world applications where correctness is critical. In this paper, we present FactReasoner, a novel neuro-symbolic based factuality assessment framework that employs probabilistic reasoning to evaluate the truthfulness of long-form generated responses. FactReasoner decomposes a response into atomic units, retrieves relevant contextual information from external knowledge sources, and models the logical relationships (e.g., entailment, contradiction) between these units and their contexts using probabilistic encodings. It then estimates the posterior probability that each atomic unit is supported by the retrieved evidence. Our experiments on both labeled and unlabeled benchmark datasets demonstrate that FactReasoner often outperforms state-of-the-art prompt-based methods in terms of factual precision and recall."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marinescu-etal-2025-factreasoner">
<titleInfo>
<title>FactReasoner: A Probabilistic Approach to Long-Form Factuality Assessment for Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Radu</namePart>
<namePart type="family">Marinescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debarun</namePart>
<namePart type="family">Bhattacharjya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junkyu</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tigran</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Tchrakian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Javier</namePart>
<namePart type="family">Carnerero-Cano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Daly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandra</namePart>
<namePart type="family">Pascale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have achieved remarkable success in generative tasks, yet they often fall short in ensuring the factual accuracy of their outputs thus limiting their reliability in real-world applications where correctness is critical. In this paper, we present FactReasoner, a novel neuro-symbolic based factuality assessment framework that employs probabilistic reasoning to evaluate the truthfulness of long-form generated responses. FactReasoner decomposes a response into atomic units, retrieves relevant contextual information from external knowledge sources, and models the logical relationships (e.g., entailment, contradiction) between these units and their contexts using probabilistic encodings. It then estimates the posterior probability that each atomic unit is supported by the retrieved evidence. Our experiments on both labeled and unlabeled benchmark datasets demonstrate that FactReasoner often outperforms state-of-the-art prompt-based methods in terms of factual precision and recall.</abstract>
<identifier type="citekey">marinescu-etal-2025-factreasoner</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.785</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.785/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>14547</start>
<end>14577</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FactReasoner: A Probabilistic Approach to Long-Form Factuality Assessment for Large Language Models
%A Marinescu, Radu
%A Bhattacharjya, Debarun
%A Lee, Junkyu
%A Tchrakian, Tigran T.
%A Carnerero-Cano, Javier
%A Hou, Yufang
%A Daly, Elizabeth M.
%A Pascale, Alessandra
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F marinescu-etal-2025-factreasoner
%X Large language models (LLMs) have achieved remarkable success in generative tasks, yet they often fall short in ensuring the factual accuracy of their outputs thus limiting their reliability in real-world applications where correctness is critical. In this paper, we present FactReasoner, a novel neuro-symbolic based factuality assessment framework that employs probabilistic reasoning to evaluate the truthfulness of long-form generated responses. FactReasoner decomposes a response into atomic units, retrieves relevant contextual information from external knowledge sources, and models the logical relationships (e.g., entailment, contradiction) between these units and their contexts using probabilistic encodings. It then estimates the posterior probability that each atomic unit is supported by the retrieved evidence. Our experiments on both labeled and unlabeled benchmark datasets demonstrate that FactReasoner often outperforms state-of-the-art prompt-based methods in terms of factual precision and recall.
%R 10.18653/v1/2025.findings-emnlp.785
%U https://aclanthology.org/2025.findings-emnlp.785/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.785
%P 14547-14577
Markdown (Informal)
[FactReasoner: A Probabilistic Approach to Long-Form Factuality Assessment for Large Language Models](https://aclanthology.org/2025.findings-emnlp.785/) (Marinescu et al., Findings 2025)
ACL
- Radu Marinescu, Debarun Bhattacharjya, Junkyu Lee, Tigran T. Tchrakian, Javier Carnerero-Cano, Yufang Hou, Elizabeth M. Daly, and Alessandra Pascale. 2025. FactReasoner: A Probabilistic Approach to Long-Form Factuality Assessment for Large Language Models. In Findings of the Association for Computational Linguistics: EMNLP 2025, pages 14547–14577, Suzhou, China. Association for Computational Linguistics.