@inproceedings{ferguson-etal-2022-retrieval,
title = "Retrieval Data Augmentation Informed by Downstream Question Answering Performance",
author = "Ferguson, James and
Hajishirzi, Hannaneh and
Dasigi, Pradeep and
Khot, Tushar",
editor = "Aly, Rami and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Fifth Fact Extraction and VERification Workshop (FEVER)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.fever-1.1",
doi = "10.18653/v1/2022.fever-1.1",
pages = "1--5",
abstract = "Training retrieval models to fetch contexts for Question Answering (QA) over large corpora requires labeling relevant passages in those corpora. Since obtaining exhaustive manual annotations of all relevant passages is not feasible, prior work uses text overlap heuristics to find passages that are likely to contain the answer, but this is not feasible when the task requires deeper reasoning and answers are not extractable spans (e.g.: multi-hop, discrete reasoning). We address this issue by identifying relevant passages based on whether they are useful for a trained QA model to arrive at the correct answers, and develop a search process guided by the QA model{'}s loss. Our experiments show that this approach enables identifying relevant context for unseen data greater than 90{\%} of the time on the IIRC dataset and generalizes better to the end QA task than those trained on just the gold retrieval data on IIRC and QASC datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferguson-etal-2022-retrieval">
<titleInfo>
<title>Retrieval Data Augmentation Informed by Downstream Question Answering Performance</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Ferguson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannaneh</namePart>
<namePart type="family">Hajishirzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pradeep</namePart>
<namePart type="family">Dasigi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tushar</namePart>
<namePart type="family">Khot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Fact Extraction and VERification Workshop (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rami</namePart>
<namePart type="family">Aly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijiang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Schlichtkrull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Training retrieval models to fetch contexts for Question Answering (QA) over large corpora requires labeling relevant passages in those corpora. Since obtaining exhaustive manual annotations of all relevant passages is not feasible, prior work uses text overlap heuristics to find passages that are likely to contain the answer, but this is not feasible when the task requires deeper reasoning and answers are not extractable spans (e.g.: multi-hop, discrete reasoning). We address this issue by identifying relevant passages based on whether they are useful for a trained QA model to arrive at the correct answers, and develop a search process guided by the QA model’s loss. Our experiments show that this approach enables identifying relevant context for unseen data greater than 90% of the time on the IIRC dataset and generalizes better to the end QA task than those trained on just the gold retrieval data on IIRC and QASC datasets.</abstract>
<identifier type="citekey">ferguson-etal-2022-retrieval</identifier>
<identifier type="doi">10.18653/v1/2022.fever-1.1</identifier>
<location>
<url>https://aclanthology.org/2022.fever-1.1</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>1</start>
<end>5</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Retrieval Data Augmentation Informed by Downstream Question Answering Performance
%A Ferguson, James
%A Hajishirzi, Hannaneh
%A Dasigi, Pradeep
%A Khot, Tushar
%Y Aly, Rami
%Y Christodoulopoulos, Christos
%Y Cocarascu, Oana
%Y Guo, Zhijiang
%Y Mittal, Arpit
%Y Schlichtkrull, Michael
%Y Thorne, James
%Y Vlachos, Andreas
%S Proceedings of the Fifth Fact Extraction and VERification Workshop (FEVER)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F ferguson-etal-2022-retrieval
%X Training retrieval models to fetch contexts for Question Answering (QA) over large corpora requires labeling relevant passages in those corpora. Since obtaining exhaustive manual annotations of all relevant passages is not feasible, prior work uses text overlap heuristics to find passages that are likely to contain the answer, but this is not feasible when the task requires deeper reasoning and answers are not extractable spans (e.g.: multi-hop, discrete reasoning). We address this issue by identifying relevant passages based on whether they are useful for a trained QA model to arrive at the correct answers, and develop a search process guided by the QA model’s loss. Our experiments show that this approach enables identifying relevant context for unseen data greater than 90% of the time on the IIRC dataset and generalizes better to the end QA task than those trained on just the gold retrieval data on IIRC and QASC datasets.
%R 10.18653/v1/2022.fever-1.1
%U https://aclanthology.org/2022.fever-1.1
%U https://doi.org/10.18653/v1/2022.fever-1.1
%P 1-5
Markdown (Informal)
[Retrieval Data Augmentation Informed by Downstream Question Answering Performance](https://aclanthology.org/2022.fever-1.1) (Ferguson et al., FEVER 2022)
ACL