@inproceedings{babakov-bugarin-diz-2026-decomposition,
title = "Decomposition Does Not Help: Evidence from Semantic Clustering in {LLM}-based Causal Graph Discovery",
author = "Babakov, Nikolay and
Bugar{\'i}n-Diz, Alberto",
editor = "Mahamood, Saad and
Howcroft, David M. and
van Deemter, Kees and
Balloccu, Simone and
Sivaprasad, Adarsa and
Sundararajan, Barkavi and
Bugar{\'i}n Diz, Alberto and
Alonso-Moral, Jose Mar{\'i}a",
booktitle = "Proceedings of the 1st Symposium on Natural Language Generation Evaluations",
month = jun,
year = "2026",
address = "Aberdeen, United Kingdom",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.retroeval-main.1/",
pages = "1--7",
ISBN = "979-8-89176-436-1",
abstract = "Recent advances in large language models (LLMs) have enabled their application to non-traditional tasks such as causal graph construction, a key component of reasoning frameworks, including Bayesian Networks. The most effective existing approaches rely on direct prompting, where an LLM generates a complete graph from a full set of variables in a single step. However, the performance of such methods degrades as the number of graph nodes increases. To address this limitation, we explore a divide-and-conquer alternative based on semantic clustering. Node representations are first embedded and clustered, after which subgraphs are constructed independently for each cluster using LLM prompting. The resulting subgraphs are then merged pairwise into a global graph. Contrary to our expectations, this approach leads to a substantial degradation in performance compared to direct prompting baselines, as measured by Structural Hamming Distance (SHD). We attribute this to the misalignment between semantic similarity and causal structure, as well as error propagation during subgraph merging. We report these negative results to highlight the limitations of decomposition strategies in LLM-based causal graphs construction."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="babakov-bugarin-diz-2026-decomposition">
<titleInfo>
<title>Decomposition Does Not Help: Evidence from Semantic Clustering in LLM-based Causal Graph Discovery</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Babakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Bugarín-Diz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Symposium on Natural Language Generation Evaluations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adarsa</namePart>
<namePart type="family">Sivaprasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barkavi</namePart>
<namePart type="family">Sundararajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Bugarín Diz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">María</namePart>
<namePart type="family">Alonso-Moral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, United Kingdom</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-436-1</identifier>
</relatedItem>
<abstract>Recent advances in large language models (LLMs) have enabled their application to non-traditional tasks such as causal graph construction, a key component of reasoning frameworks, including Bayesian Networks. The most effective existing approaches rely on direct prompting, where an LLM generates a complete graph from a full set of variables in a single step. However, the performance of such methods degrades as the number of graph nodes increases. To address this limitation, we explore a divide-and-conquer alternative based on semantic clustering. Node representations are first embedded and clustered, after which subgraphs are constructed independently for each cluster using LLM prompting. The resulting subgraphs are then merged pairwise into a global graph. Contrary to our expectations, this approach leads to a substantial degradation in performance compared to direct prompting baselines, as measured by Structural Hamming Distance (SHD). We attribute this to the misalignment between semantic similarity and causal structure, as well as error propagation during subgraph merging. We report these negative results to highlight the limitations of decomposition strategies in LLM-based causal graphs construction.</abstract>
<identifier type="citekey">babakov-bugarin-diz-2026-decomposition</identifier>
<location>
<url>https://aclanthology.org/2026.retroeval-main.1/</url>
</location>
<part>
<date>2026-06</date>
<extent unit="page">
<start>1</start>
<end>7</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Decomposition Does Not Help: Evidence from Semantic Clustering in LLM-based Causal Graph Discovery
%A Babakov, Nikolay
%A Bugarín-Diz, Alberto
%Y Mahamood, Saad
%Y Howcroft, David M.
%Y van Deemter, Kees
%Y Balloccu, Simone
%Y Sivaprasad, Adarsa
%Y Sundararajan, Barkavi
%Y Bugarín Diz, Alberto
%Y Alonso-Moral, Jose María
%S Proceedings of the 1st Symposium on Natural Language Generation Evaluations
%D 2026
%8 June
%I Association for Computational Linguistics
%C Aberdeen, United Kingdom
%@ 979-8-89176-436-1
%F babakov-bugarin-diz-2026-decomposition
%X Recent advances in large language models (LLMs) have enabled their application to non-traditional tasks such as causal graph construction, a key component of reasoning frameworks, including Bayesian Networks. The most effective existing approaches rely on direct prompting, where an LLM generates a complete graph from a full set of variables in a single step. However, the performance of such methods degrades as the number of graph nodes increases. To address this limitation, we explore a divide-and-conquer alternative based on semantic clustering. Node representations are first embedded and clustered, after which subgraphs are constructed independently for each cluster using LLM prompting. The resulting subgraphs are then merged pairwise into a global graph. Contrary to our expectations, this approach leads to a substantial degradation in performance compared to direct prompting baselines, as measured by Structural Hamming Distance (SHD). We attribute this to the misalignment between semantic similarity and causal structure, as well as error propagation during subgraph merging. We report these negative results to highlight the limitations of decomposition strategies in LLM-based causal graphs construction.
%U https://aclanthology.org/2026.retroeval-main.1/
%P 1-7
Markdown (Informal)
[Decomposition Does Not Help: Evidence from Semantic Clustering in LLM-based Causal Graph Discovery](https://aclanthology.org/2026.retroeval-main.1/) (Babakov & Bugarín-Diz, RetroEval 2026)
ACL