@inproceedings{thorne-2022-data,
title = "Data-Efficient Auto-Regressive Document Retrieval for Fact Verification",
author = "Thorne, James",
editor = {Fan, Angela and
Gurevych, Iryna and
Hou, Yufang and
Kozareva, Zornitsa and
Luccioni, Sasha and
Sadat Moosavi, Nafise and
Ravi, Sujith and
Kim, Gyuwan and
Schwartz, Roy and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of The Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sustainlp-1.7",
doi = "10.18653/v1/2022.sustainlp-1.7",
pages = "44--51",
abstract = "Document retrieval is a core component of many knowledge-intensive natural language processing task formulations such as fact verification. Sources of textual knowledge such as Wikipedia articles condition the generation of answers from the models. Recent advances in retrieval use sequence-to-sequence models to incrementally predict the title of the appropriate Wikipedia page given an input instance. However, this method requires supervision in the form of human annotation to label which Wikipedia pages contain appropriate context. This paper introduces a distant-supervision method that does not require any annotation train auto-regressive retrievers that attain competitive R-Precision and Recall in a zero-shot setting. Furthermore we show that with task-specific supervised fine-tuning, auto-regressive retrieval performance for two Wikipedia-based fact verification tasks can approach or even exceed full supervision using less than $1/4$ of the annotated data. We release all code and models",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thorne-2022-data">
<titleInfo>
<title>Data-Efficient Auto-Regressive Document Retrieval for Fact Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sasha</namePart>
<namePart type="family">Luccioni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="family">Sadat Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujith</namePart>
<namePart type="family">Ravi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gyuwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roy</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Document retrieval is a core component of many knowledge-intensive natural language processing task formulations such as fact verification. Sources of textual knowledge such as Wikipedia articles condition the generation of answers from the models. Recent advances in retrieval use sequence-to-sequence models to incrementally predict the title of the appropriate Wikipedia page given an input instance. However, this method requires supervision in the form of human annotation to label which Wikipedia pages contain appropriate context. This paper introduces a distant-supervision method that does not require any annotation train auto-regressive retrievers that attain competitive R-Precision and Recall in a zero-shot setting. Furthermore we show that with task-specific supervised fine-tuning, auto-regressive retrieval performance for two Wikipedia-based fact verification tasks can approach or even exceed full supervision using less than 1/4 of the annotated data. We release all code and models</abstract>
<identifier type="citekey">thorne-2022-data</identifier>
<identifier type="doi">10.18653/v1/2022.sustainlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.sustainlp-1.7</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>44</start>
<end>51</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-Efficient Auto-Regressive Document Retrieval for Fact Verification
%A Thorne, James
%Y Fan, Angela
%Y Gurevych, Iryna
%Y Hou, Yufang
%Y Kozareva, Zornitsa
%Y Luccioni, Sasha
%Y Sadat Moosavi, Nafise
%Y Ravi, Sujith
%Y Kim, Gyuwan
%Y Schwartz, Roy
%Y Rücklé, Andreas
%S Proceedings of The Third Workshop on Simple and Efficient Natural Language Processing (SustaiNLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F thorne-2022-data
%X Document retrieval is a core component of many knowledge-intensive natural language processing task formulations such as fact verification. Sources of textual knowledge such as Wikipedia articles condition the generation of answers from the models. Recent advances in retrieval use sequence-to-sequence models to incrementally predict the title of the appropriate Wikipedia page given an input instance. However, this method requires supervision in the form of human annotation to label which Wikipedia pages contain appropriate context. This paper introduces a distant-supervision method that does not require any annotation train auto-regressive retrievers that attain competitive R-Precision and Recall in a zero-shot setting. Furthermore we show that with task-specific supervised fine-tuning, auto-regressive retrieval performance for two Wikipedia-based fact verification tasks can approach or even exceed full supervision using less than 1/4 of the annotated data. We release all code and models
%R 10.18653/v1/2022.sustainlp-1.7
%U https://aclanthology.org/2022.sustainlp-1.7
%U https://doi.org/10.18653/v1/2022.sustainlp-1.7
%P 44-51
Markdown (Informal)
[Data-Efficient Auto-Regressive Document Retrieval for Fact Verification](https://aclanthology.org/2022.sustainlp-1.7) (Thorne, sustainlp 2022)
ACL