@inproceedings{same-etal-2026-comparative,
title = "A Comparative Evaluation of End-to-End and Pipeline Approaches for Summarisation",
author = "Same, Fahime and
Mahamood, Saad and
Kamath, Srinivas Ramesh",
editor = "Mahamood, Saad and
Howcroft, David M. and
van Deemter, Kees and
Balloccu, Simone and
Sivaprasad, Adarsa and
Sundararajan, Barkavi and
Bugar{\'i}n Diz, Alberto and
Alonso-Moral, Jose Mar{\'i}a",
booktitle = "Proceedings of the 1st Symposium on Natural Language Generation Evaluations",
month = jun,
year = "2026",
address = "Aberdeen, United Kingdom",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.retroeval-main.6/",
pages = "39--52",
ISBN = "979-8-89176-436-1",
abstract = "We describe and evaluate two different architectures for creating book highlights from unstructured data. Given the prevalence of large language models, we examine whether a pipeline-based approach with intermediate steps for text generation is still necessary and whether it continues to offer any benefits over an end-to-end approach. Our comparative evaluations using LLM-as-a-judge across multiple models with different parameter sizes and generation scenarios show that highlights generated by the end-to-end approach are preferred. However, there is a slight but consistent increase in faithfulness for the pipeline-generated highlights when generating at a thematic level. Additionally, our analysis across multiple models shows that while larger models are more faithful, the degree of faithfulness increases when they are used with a pipeline architecture. The findings from our work indicate that whilst there is comparability between the two approaches, the greater faithfulness, controllability, and observability of pipeline-based approaches offer tangible benefits in applied settings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="same-etal-2026-comparative">
<titleInfo>
<title>A Comparative Evaluation of End-to-End and Pipeline Approaches for Summarisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fahime</namePart>
<namePart type="family">Same</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivas</namePart>
<namePart type="given">Ramesh</namePart>
<namePart type="family">Kamath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Symposium on Natural Language Generation Evaluations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adarsa</namePart>
<namePart type="family">Sivaprasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barkavi</namePart>
<namePart type="family">Sundararajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Bugarín Diz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">María</namePart>
<namePart type="family">Alonso-Moral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, United Kingdom</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-436-1</identifier>
</relatedItem>
<abstract>We describe and evaluate two different architectures for creating book highlights from unstructured data. Given the prevalence of large language models, we examine whether a pipeline-based approach with intermediate steps for text generation is still necessary and whether it continues to offer any benefits over an end-to-end approach. Our comparative evaluations using LLM-as-a-judge across multiple models with different parameter sizes and generation scenarios show that highlights generated by the end-to-end approach are preferred. However, there is a slight but consistent increase in faithfulness for the pipeline-generated highlights when generating at a thematic level. Additionally, our analysis across multiple models shows that while larger models are more faithful, the degree of faithfulness increases when they are used with a pipeline architecture. The findings from our work indicate that whilst there is comparability between the two approaches, the greater faithfulness, controllability, and observability of pipeline-based approaches offer tangible benefits in applied settings.</abstract>
<identifier type="citekey">same-etal-2026-comparative</identifier>
<location>
<url>https://aclanthology.org/2026.retroeval-main.6/</url>
</location>
<part>
<date>2026-06</date>
<extent unit="page">
<start>39</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Comparative Evaluation of End-to-End and Pipeline Approaches for Summarisation
%A Same, Fahime
%A Mahamood, Saad
%A Kamath, Srinivas Ramesh
%Y Mahamood, Saad
%Y Howcroft, David M.
%Y van Deemter, Kees
%Y Balloccu, Simone
%Y Sivaprasad, Adarsa
%Y Sundararajan, Barkavi
%Y Bugarín Diz, Alberto
%Y Alonso-Moral, Jose María
%S Proceedings of the 1st Symposium on Natural Language Generation Evaluations
%D 2026
%8 June
%I Association for Computational Linguistics
%C Aberdeen, United Kingdom
%@ 979-8-89176-436-1
%F same-etal-2026-comparative
%X We describe and evaluate two different architectures for creating book highlights from unstructured data. Given the prevalence of large language models, we examine whether a pipeline-based approach with intermediate steps for text generation is still necessary and whether it continues to offer any benefits over an end-to-end approach. Our comparative evaluations using LLM-as-a-judge across multiple models with different parameter sizes and generation scenarios show that highlights generated by the end-to-end approach are preferred. However, there is a slight but consistent increase in faithfulness for the pipeline-generated highlights when generating at a thematic level. Additionally, our analysis across multiple models shows that while larger models are more faithful, the degree of faithfulness increases when they are used with a pipeline architecture. The findings from our work indicate that whilst there is comparability between the two approaches, the greater faithfulness, controllability, and observability of pipeline-based approaches offer tangible benefits in applied settings.
%U https://aclanthology.org/2026.retroeval-main.6/
%P 39-52
Markdown (Informal)
[A Comparative Evaluation of End-to-End and Pipeline Approaches for Summarisation](https://aclanthology.org/2026.retroeval-main.6/) (Same et al., RetroEval 2026)
ACL