@inproceedings{wanner-etal-2024-closer,
title = "A Closer Look at Claim Decomposition",
author = "Wanner, Miriam and
Ebner, Seth and
Jiang, Zhengping and
Dredze, Mark and
Van Durme, Benjamin",
editor = "Bollegala, Danushka and
Shwartz, Vered",
booktitle = "Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.starsem-1.13",
doi = "10.18653/v1/2024.starsem-1.13",
pages = "153--175",
abstract = "As generated text becomes more commonplace, it is increasingly important to evaluate how well-supported such text is by external knowledge sources. Many approaches for evaluating textual support rely on some method for decomposing text into its individual subclaims which are scored against a trusted reference. We investigate how various methods of claim decomposition{---}especially LLM-based methods{---}affect the result of an evaluation approach such as the recently proposed FActScore, finding that it is sensitive to the decomposition method used. This sensitivity arises because such metrics attribute overall textual support to the model that generated the text even though error can also come from the metric{'}s decomposition step. To measure decomposition quality, we introduce an adaptation of FActScore, which we call DecompScore. We then propose an LLM-based approach to generating decompositions inspired by Bertrand Russell{'}s theory of logical atomism and neo-Davidsonian semantics and demonstrate its improved decomposition quality over previous methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wanner-etal-2024-closer">
<titleInfo>
<title>A Closer Look at Claim Decomposition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Ebner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengping</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dredze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Van Durme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As generated text becomes more commonplace, it is increasingly important to evaluate how well-supported such text is by external knowledge sources. Many approaches for evaluating textual support rely on some method for decomposing text into its individual subclaims which are scored against a trusted reference. We investigate how various methods of claim decomposition—especially LLM-based methods—affect the result of an evaluation approach such as the recently proposed FActScore, finding that it is sensitive to the decomposition method used. This sensitivity arises because such metrics attribute overall textual support to the model that generated the text even though error can also come from the metric’s decomposition step. To measure decomposition quality, we introduce an adaptation of FActScore, which we call DecompScore. We then propose an LLM-based approach to generating decompositions inspired by Bertrand Russell’s theory of logical atomism and neo-Davidsonian semantics and demonstrate its improved decomposition quality over previous methods.</abstract>
<identifier type="citekey">wanner-etal-2024-closer</identifier>
<identifier type="doi">10.18653/v1/2024.starsem-1.13</identifier>
<location>
<url>https://aclanthology.org/2024.starsem-1.13</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>153</start>
<end>175</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Closer Look at Claim Decomposition
%A Wanner, Miriam
%A Ebner, Seth
%A Jiang, Zhengping
%A Dredze, Mark
%A Van Durme, Benjamin
%Y Bollegala, Danushka
%Y Shwartz, Vered
%S Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F wanner-etal-2024-closer
%X As generated text becomes more commonplace, it is increasingly important to evaluate how well-supported such text is by external knowledge sources. Many approaches for evaluating textual support rely on some method for decomposing text into its individual subclaims which are scored against a trusted reference. We investigate how various methods of claim decomposition—especially LLM-based methods—affect the result of an evaluation approach such as the recently proposed FActScore, finding that it is sensitive to the decomposition method used. This sensitivity arises because such metrics attribute overall textual support to the model that generated the text even though error can also come from the metric’s decomposition step. To measure decomposition quality, we introduce an adaptation of FActScore, which we call DecompScore. We then propose an LLM-based approach to generating decompositions inspired by Bertrand Russell’s theory of logical atomism and neo-Davidsonian semantics and demonstrate its improved decomposition quality over previous methods.
%R 10.18653/v1/2024.starsem-1.13
%U https://aclanthology.org/2024.starsem-1.13
%U https://doi.org/10.18653/v1/2024.starsem-1.13
%P 153-175
Markdown (Informal)
[A Closer Look at Claim Decomposition](https://aclanthology.org/2024.starsem-1.13) (Wanner et al., *SEM 2024)
ACL
- Miriam Wanner, Seth Ebner, Zhengping Jiang, Mark Dredze, and Benjamin Van Durme. 2024. A Closer Look at Claim Decomposition. In Proceedings of the 13th Joint Conference on Lexical and Computational Semantics (*SEM 2024), pages 153–175, Mexico City, Mexico. Association for Computational Linguistics.