@inproceedings{radevski-etal-2025-synthesizing,
title = "On Synthesizing Data for Context Attribution in Question Answering",
author = "Radevski, Gorjan and
Gashteovski, Kiril and
Syed, Shahbaz and
Malon, Christopher and
Nicolas, Sebastien and
Hung, Chia-Chien and
Sztyler, Timo and
Heu{\ss}er, Verena and
Ben Rim, Wiem and
Enomoto, Masafumi and
Takeoka, Kunihiro and
Oyamada, Masafumi and
Glava{\v{s}}, Goran and
Lawrence, Carolin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.828/",
doi = "10.18653/v1/2025.acl-long.828",
pages = "16929--16950",
ISBN = "979-8-89176-251-0",
abstract = "Question Answering (QA) accounts for a significant portion of LLM usage in the wild''. However, LLMs sometimes produce false or misleading responses, also known as hallucinations''. Therefore, grounding the generated answers in contextually provided information{---}i.e., providing evidence for the generated text{---}is paramount for LLMs' trustworthiness. Providing this information is the task of context attribution. In this paper, we systematically study LLM-based approaches for this task, namely we investigate (i) zero-shot inference, (ii) LLM ensembling, and (iii) fine-tuning of small LMs on synthetic data generated by larger LLMs. Our key contribution is SynQA: a novel generative strategy for synthesizing context attribution data. Given selected context sentences, an LLM generates QA pairs that are supported by these sentences. This leverages LLMs' natural strengths in text generation while ensuring clear attribution paths in the synthetic training data. We show that the attribution data synthesized via SynQA is highly effective for fine-tuning small LMs for context attribution in different QA tasks and domains. Finally, with a user study, we validate the usefulness of small LMs (fine-tuned on synthetic data from SynQA) in context attribution for QA."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="radevski-etal-2025-synthesizing">
<titleInfo>
<title>On Synthesizing Data for Context Attribution in Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gorjan</namePart>
<namePart type="family">Radevski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiril</namePart>
<namePart type="family">Gashteovski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahbaz</namePart>
<namePart type="family">Syed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Malon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Nicolas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chia-Chien</namePart>
<namePart type="family">Hung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timo</namePart>
<namePart type="family">Sztyler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verena</namePart>
<namePart type="family">Heußer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wiem</namePart>
<namePart type="family">Ben Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masafumi</namePart>
<namePart type="family">Enomoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunihiro</namePart>
<namePart type="family">Takeoka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masafumi</namePart>
<namePart type="family">Oyamada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Glavaš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolin</namePart>
<namePart type="family">Lawrence</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Question Answering (QA) accounts for a significant portion of LLM usage in the wild”. However, LLMs sometimes produce false or misleading responses, also known as hallucinations”. Therefore, grounding the generated answers in contextually provided information—i.e., providing evidence for the generated text—is paramount for LLMs’ trustworthiness. Providing this information is the task of context attribution. In this paper, we systematically study LLM-based approaches for this task, namely we investigate (i) zero-shot inference, (ii) LLM ensembling, and (iii) fine-tuning of small LMs on synthetic data generated by larger LLMs. Our key contribution is SynQA: a novel generative strategy for synthesizing context attribution data. Given selected context sentences, an LLM generates QA pairs that are supported by these sentences. This leverages LLMs’ natural strengths in text generation while ensuring clear attribution paths in the synthetic training data. We show that the attribution data synthesized via SynQA is highly effective for fine-tuning small LMs for context attribution in different QA tasks and domains. Finally, with a user study, we validate the usefulness of small LMs (fine-tuned on synthetic data from SynQA) in context attribution for QA.</abstract>
<identifier type="citekey">radevski-etal-2025-synthesizing</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.828</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.828/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>16929</start>
<end>16950</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Synthesizing Data for Context Attribution in Question Answering
%A Radevski, Gorjan
%A Gashteovski, Kiril
%A Syed, Shahbaz
%A Malon, Christopher
%A Nicolas, Sebastien
%A Hung, Chia-Chien
%A Sztyler, Timo
%A Heußer, Verena
%A Ben Rim, Wiem
%A Enomoto, Masafumi
%A Takeoka, Kunihiro
%A Oyamada, Masafumi
%A Glavaš, Goran
%A Lawrence, Carolin
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F radevski-etal-2025-synthesizing
%X Question Answering (QA) accounts for a significant portion of LLM usage in the wild”. However, LLMs sometimes produce false or misleading responses, also known as hallucinations”. Therefore, grounding the generated answers in contextually provided information—i.e., providing evidence for the generated text—is paramount for LLMs’ trustworthiness. Providing this information is the task of context attribution. In this paper, we systematically study LLM-based approaches for this task, namely we investigate (i) zero-shot inference, (ii) LLM ensembling, and (iii) fine-tuning of small LMs on synthetic data generated by larger LLMs. Our key contribution is SynQA: a novel generative strategy for synthesizing context attribution data. Given selected context sentences, an LLM generates QA pairs that are supported by these sentences. This leverages LLMs’ natural strengths in text generation while ensuring clear attribution paths in the synthetic training data. We show that the attribution data synthesized via SynQA is highly effective for fine-tuning small LMs for context attribution in different QA tasks and domains. Finally, with a user study, we validate the usefulness of small LMs (fine-tuned on synthetic data from SynQA) in context attribution for QA.
%R 10.18653/v1/2025.acl-long.828
%U https://aclanthology.org/2025.acl-long.828/
%U https://doi.org/10.18653/v1/2025.acl-long.828
%P 16929-16950
Markdown (Informal)
[On Synthesizing Data for Context Attribution in Question Answering](https://aclanthology.org/2025.acl-long.828/) (Radevski et al., ACL 2025)
ACL
- Gorjan Radevski, Kiril Gashteovski, Shahbaz Syed, Christopher Malon, Sebastien Nicolas, Chia-Chien Hung, Timo Sztyler, Verena Heußer, Wiem Ben Rim, Masafumi Enomoto, Kunihiro Takeoka, Masafumi Oyamada, Goran Glavaš, and Carolin Lawrence. 2025. On Synthesizing Data for Context Attribution in Question Answering. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 16929–16950, Vienna, Austria. Association for Computational Linguistics.