@inproceedings{nair-etal-2023-drilling,
title = "Drilling Down into the Discourse Structure with {LLM}s for Long Document Question Answering",
author = "Nair, Inderjeet and
Somasundaram, Shwetha and
Saxena, Apoorv and
Goswami, Koustava",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.972",
doi = "10.18653/v1/2023.findings-emnlp.972",
pages = "14593--14606",
abstract = "We address the task of evidence retrieval for long document question answering, which involves locating relevant paragraphs within a document to answer a question. We aim to assess the applicability of large language models (LLMs) in the task of zero-shot long document evidence retrieval, owing to their unprecedented performance across various NLP tasks. However, currently the LLMs can consume limited context lengths as input, thus providing document chunks as inputs might overlook the global context while missing out on capturing the inter-segment dependencies. Moreover, directly feeding the large input sets can incur significant computational costs, particularly when processing the entire document (and potentially incurring monetary expenses with enterprise APIs like OpenAI{'}s GPT variants). To address these challenges, we propose a suite of techniques that exploit the discourse structure commonly found in documents. By utilizing this structure, we create a condensed representation of the document, enabling a more comprehensive understanding and analysis of relationships between different parts. We retain 99.6{\%} of the best zero-shot approach{'}s performance, while processing only 26{\%} of the total tokens used by the best approach in the information seeking evidence retrieval setup. We also show how our approach can be combined with *self-ask* reasoning agent to achieve best zero-shot performance in complex multi-hop question answering, just $\approx 4${\%} short of zero-shot performance using gold evidence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nair-etal-2023-drilling">
<titleInfo>
<title>Drilling Down into the Discourse Structure with LLMs for Long Document Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Inderjeet</namePart>
<namePart type="family">Nair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shwetha</namePart>
<namePart type="family">Somasundaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Saxena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustava</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We address the task of evidence retrieval for long document question answering, which involves locating relevant paragraphs within a document to answer a question. We aim to assess the applicability of large language models (LLMs) in the task of zero-shot long document evidence retrieval, owing to their unprecedented performance across various NLP tasks. However, currently the LLMs can consume limited context lengths as input, thus providing document chunks as inputs might overlook the global context while missing out on capturing the inter-segment dependencies. Moreover, directly feeding the large input sets can incur significant computational costs, particularly when processing the entire document (and potentially incurring monetary expenses with enterprise APIs like OpenAI’s GPT variants). To address these challenges, we propose a suite of techniques that exploit the discourse structure commonly found in documents. By utilizing this structure, we create a condensed representation of the document, enabling a more comprehensive understanding and analysis of relationships between different parts. We retain 99.6% of the best zero-shot approach’s performance, while processing only 26% of the total tokens used by the best approach in the information seeking evidence retrieval setup. We also show how our approach can be combined with *self-ask* reasoning agent to achieve best zero-shot performance in complex multi-hop question answering, just \approx 4% short of zero-shot performance using gold evidence.</abstract>
<identifier type="citekey">nair-etal-2023-drilling</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.972</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.972</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>14593</start>
<end>14606</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Drilling Down into the Discourse Structure with LLMs for Long Document Question Answering
%A Nair, Inderjeet
%A Somasundaram, Shwetha
%A Saxena, Apoorv
%A Goswami, Koustava
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F nair-etal-2023-drilling
%X We address the task of evidence retrieval for long document question answering, which involves locating relevant paragraphs within a document to answer a question. We aim to assess the applicability of large language models (LLMs) in the task of zero-shot long document evidence retrieval, owing to their unprecedented performance across various NLP tasks. However, currently the LLMs can consume limited context lengths as input, thus providing document chunks as inputs might overlook the global context while missing out on capturing the inter-segment dependencies. Moreover, directly feeding the large input sets can incur significant computational costs, particularly when processing the entire document (and potentially incurring monetary expenses with enterprise APIs like OpenAI’s GPT variants). To address these challenges, we propose a suite of techniques that exploit the discourse structure commonly found in documents. By utilizing this structure, we create a condensed representation of the document, enabling a more comprehensive understanding and analysis of relationships between different parts. We retain 99.6% of the best zero-shot approach’s performance, while processing only 26% of the total tokens used by the best approach in the information seeking evidence retrieval setup. We also show how our approach can be combined with *self-ask* reasoning agent to achieve best zero-shot performance in complex multi-hop question answering, just \approx 4% short of zero-shot performance using gold evidence.
%R 10.18653/v1/2023.findings-emnlp.972
%U https://aclanthology.org/2023.findings-emnlp.972
%U https://doi.org/10.18653/v1/2023.findings-emnlp.972
%P 14593-14606
Markdown (Informal)
[Drilling Down into the Discourse Structure with LLMs for Long Document Question Answering](https://aclanthology.org/2023.findings-emnlp.972) (Nair et al., Findings 2023)
ACL