@inproceedings{mass-etal-2024-bang,
title = "More Bang for your Context: Virtual Documents for Question Answering over Long Documents",
author = "Mass, Yosi and
Carmeli, Boaz and
Yehudai, Asaf and
Toledo, Assaf and
Mills, Nathaniel",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.757",
pages = "12936--12942",
abstract = "We deal with the problem of Question Answering (QA) over a long document, which poses a challenge for modern Large Language Models (LLMs). Although LLMs can handle increasingly longer context windows, they struggle to effectively utilize the long content. To address this issue, we introduce the concept of a virtual document (VDoc). A VDoc is created by selecting chunks from the original document that are most likely to contain the information needed to answer the user{'}s question, while ensuring they fit within the LLM{'}s context window. We hypothesize that providing a short and focused VDoc to the LLM is more effective than filling the entire context window with less relevant information. Our experiments confirm this hypothesis and demonstrate that using VDocs improves results on the QA task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mass-etal-2024-bang">
<titleInfo>
<title>More Bang for your Context: Virtual Documents for Question Answering over Long Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yosi</namePart>
<namePart type="family">Mass</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Boaz</namePart>
<namePart type="family">Carmeli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asaf</namePart>
<namePart type="family">Yehudai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Assaf</namePart>
<namePart type="family">Toledo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Mills</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We deal with the problem of Question Answering (QA) over a long document, which poses a challenge for modern Large Language Models (LLMs). Although LLMs can handle increasingly longer context windows, they struggle to effectively utilize the long content. To address this issue, we introduce the concept of a virtual document (VDoc). A VDoc is created by selecting chunks from the original document that are most likely to contain the information needed to answer the user’s question, while ensuring they fit within the LLM’s context window. We hypothesize that providing a short and focused VDoc to the LLM is more effective than filling the entire context window with less relevant information. Our experiments confirm this hypothesis and demonstrate that using VDocs improves results on the QA task.</abstract>
<identifier type="citekey">mass-etal-2024-bang</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.757</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>12936</start>
<end>12942</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T More Bang for your Context: Virtual Documents for Question Answering over Long Documents
%A Mass, Yosi
%A Carmeli, Boaz
%A Yehudai, Asaf
%A Toledo, Assaf
%A Mills, Nathaniel
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F mass-etal-2024-bang
%X We deal with the problem of Question Answering (QA) over a long document, which poses a challenge for modern Large Language Models (LLMs). Although LLMs can handle increasingly longer context windows, they struggle to effectively utilize the long content. To address this issue, we introduce the concept of a virtual document (VDoc). A VDoc is created by selecting chunks from the original document that are most likely to contain the information needed to answer the user’s question, while ensuring they fit within the LLM’s context window. We hypothesize that providing a short and focused VDoc to the LLM is more effective than filling the entire context window with less relevant information. Our experiments confirm this hypothesis and demonstrate that using VDocs improves results on the QA task.
%U https://aclanthology.org/2024.findings-emnlp.757
%P 12936-12942
Markdown (Informal)
[More Bang for your Context: Virtual Documents for Question Answering over Long Documents](https://aclanthology.org/2024.findings-emnlp.757) (Mass et al., Findings 2024)
ACL