@inproceedings{shen-chai-2025-graph,
title = "Graph-Augmented Open-Domain Multi-Document Summarization",
author = "Shen, Xiaoping and
Chai, Yekun",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-industry.27/",
pages = "318--330",
abstract = "In the open-domain multi-document summarization (ODMDS) task, retrieving relevant documents from large repositories and generating coherent summaries are crucial. However, existing methods often treat retrieval and summarization as separate tasks, neglecting the relationships among documents. To address these limitations, we propose an integrated retrieval-summarization framework that captures global document relationships through graph-based clustering, guiding the re-ranking of retrieved documents. This cluster-level thematic information is then used to guide large language models (LLMs) in refining the retrieved documents and generating more accurate, coherent summaries. Experimental results on the ODSUM benchmark demonstrate that our method significantly improves retrieval accuracy and produces summaries that surpass those derived from the oracle documents. These findings highlight the potential of our framework to improve both retrieval and summarization tasks in ODMDS."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shen-chai-2025-graph">
<titleInfo>
<title>Graph-Augmented Open-Domain Multi-Document Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaoping</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yekun</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the open-domain multi-document summarization (ODMDS) task, retrieving relevant documents from large repositories and generating coherent summaries are crucial. However, existing methods often treat retrieval and summarization as separate tasks, neglecting the relationships among documents. To address these limitations, we propose an integrated retrieval-summarization framework that captures global document relationships through graph-based clustering, guiding the re-ranking of retrieved documents. This cluster-level thematic information is then used to guide large language models (LLMs) in refining the retrieved documents and generating more accurate, coherent summaries. Experimental results on the ODSUM benchmark demonstrate that our method significantly improves retrieval accuracy and produces summaries that surpass those derived from the oracle documents. These findings highlight the potential of our framework to improve both retrieval and summarization tasks in ODMDS.</abstract>
<identifier type="citekey">shen-chai-2025-graph</identifier>
<location>
<url>https://aclanthology.org/2025.coling-industry.27/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>318</start>
<end>330</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Graph-Augmented Open-Domain Multi-Document Summarization
%A Shen, Xiaoping
%A Chai, Yekun
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%Y Darwish, Kareem
%Y Agarwal, Apoorv
%S Proceedings of the 31st International Conference on Computational Linguistics: Industry Track
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F shen-chai-2025-graph
%X In the open-domain multi-document summarization (ODMDS) task, retrieving relevant documents from large repositories and generating coherent summaries are crucial. However, existing methods often treat retrieval and summarization as separate tasks, neglecting the relationships among documents. To address these limitations, we propose an integrated retrieval-summarization framework that captures global document relationships through graph-based clustering, guiding the re-ranking of retrieved documents. This cluster-level thematic information is then used to guide large language models (LLMs) in refining the retrieved documents and generating more accurate, coherent summaries. Experimental results on the ODSUM benchmark demonstrate that our method significantly improves retrieval accuracy and produces summaries that surpass those derived from the oracle documents. These findings highlight the potential of our framework to improve both retrieval and summarization tasks in ODMDS.
%U https://aclanthology.org/2025.coling-industry.27/
%P 318-330
Markdown (Informal)
[Graph-Augmented Open-Domain Multi-Document Summarization](https://aclanthology.org/2025.coling-industry.27/) (Shen & Chai, COLING 2025)
ACL