@article{rosenthal-etal-2025-clapnq,
title = "{CLAP}nq: Cohesive Long-form Answers from Passages in Natural Questions for {RAG} systems",
author = "Rosenthal, Sara and
Sil, Avirup and
Florian, Radu and
Roukos, Salim",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.tacl-1.3/",
doi = "10.1162/tacl_a_00729",
pages = "53--72",
abstract = "Retrieval Augmented Generation (RAG) has become a popular application for large language models. It is preferable that successful RAG systems provide accurate answers that are supported by being grounded in a passage without any hallucinations. While considerable work is required for building a full RAG pipeline, being able to benchmark performance is also necessary. We present CLAPnq, a benchmark Long-form Question Answering dataset for the full RAG pipeline. CLAPnq includes long answers with grounded gold passages from Natural Questions (NQ) and a corpus to perform either retrieval, generation, or the full RAG pipeline. The CLAPnq answers are concise, 3x smaller than the full passage, and cohesive, meaning that the answer is composed fluently, often by integrating multiple pieces of the passage that are not contiguous. RAG models must adapt to these properties to be successful at CLAPnq. We present baseline experiments and analysis for CLAPnq that highlight areas where there is still significant room for improvement in grounded RAG. CLAPnq is publicly available at https://github.com/primeqa/clapnq."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rosenthal-etal-2025-clapnq">
<titleInfo>
<title>CLAPnq: Cohesive Long-form Answers from Passages in Natural Questions for RAG systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avirup</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radu</namePart>
<namePart type="family">Florian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salim</namePart>
<namePart type="family">Roukos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Retrieval Augmented Generation (RAG) has become a popular application for large language models. It is preferable that successful RAG systems provide accurate answers that are supported by being grounded in a passage without any hallucinations. While considerable work is required for building a full RAG pipeline, being able to benchmark performance is also necessary. We present CLAPnq, a benchmark Long-form Question Answering dataset for the full RAG pipeline. CLAPnq includes long answers with grounded gold passages from Natural Questions (NQ) and a corpus to perform either retrieval, generation, or the full RAG pipeline. The CLAPnq answers are concise, 3x smaller than the full passage, and cohesive, meaning that the answer is composed fluently, often by integrating multiple pieces of the passage that are not contiguous. RAG models must adapt to these properties to be successful at CLAPnq. We present baseline experiments and analysis for CLAPnq that highlight areas where there is still significant room for improvement in grounded RAG. CLAPnq is publicly available at https://github.com/primeqa/clapnq.</abstract>
<identifier type="citekey">rosenthal-etal-2025-clapnq</identifier>
<identifier type="doi">10.1162/tacl_a_00729</identifier>
<location>
<url>https://aclanthology.org/2025.tacl-1.3/</url>
</location>
<part>
<date>2025</date>
<detail type="volume"><number>13</number></detail>
<extent unit="page">
<start>53</start>
<end>72</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T CLAPnq: Cohesive Long-form Answers from Passages in Natural Questions for RAG systems
%A Rosenthal, Sara
%A Sil, Avirup
%A Florian, Radu
%A Roukos, Salim
%J Transactions of the Association for Computational Linguistics
%D 2025
%V 13
%I MIT Press
%C Cambridge, MA
%F rosenthal-etal-2025-clapnq
%X Retrieval Augmented Generation (RAG) has become a popular application for large language models. It is preferable that successful RAG systems provide accurate answers that are supported by being grounded in a passage without any hallucinations. While considerable work is required for building a full RAG pipeline, being able to benchmark performance is also necessary. We present CLAPnq, a benchmark Long-form Question Answering dataset for the full RAG pipeline. CLAPnq includes long answers with grounded gold passages from Natural Questions (NQ) and a corpus to perform either retrieval, generation, or the full RAG pipeline. The CLAPnq answers are concise, 3x smaller than the full passage, and cohesive, meaning that the answer is composed fluently, often by integrating multiple pieces of the passage that are not contiguous. RAG models must adapt to these properties to be successful at CLAPnq. We present baseline experiments and analysis for CLAPnq that highlight areas where there is still significant room for improvement in grounded RAG. CLAPnq is publicly available at https://github.com/primeqa/clapnq.
%R 10.1162/tacl_a_00729
%U https://aclanthology.org/2025.tacl-1.3/
%U https://doi.org/10.1162/tacl_a_00729
%P 53-72
Markdown (Informal)
[CLAPnq: Cohesive Long-form Answers from Passages in Natural Questions for RAG systems](https://aclanthology.org/2025.tacl-1.3/) (Rosenthal et al., TACL 2025)
ACL