@article{khattab-etal-2021-relevance,
title = "Relevance-guided Supervision for {O}pen{QA} with {C}ol{BERT}",
author = "Khattab, Omar and
Potts, Christopher and
Zaharia, Matei",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.55",
doi = "10.1162/tacl_a_00405",
pages = "929--944",
abstract = "Systems for Open-Domain Question Answering (OpenQA) generally depend on a retriever for finding candidate passages in a large corpus and a reader for extracting answers from those passages. In much recent work, the retriever is a learned component that uses coarse-grained vector representations of questions and passages. We argue that this modeling choice is insufficiently expressive for dealing with the complexity of natural language questions. To address this, we define ColBERT-QA, which adapts the scalable neural retrieval model ColBERT to OpenQA. ColBERT creates fine-grained interactions between questions and passages. We propose an efficient weak supervision strategy that iteratively uses ColBERT to create its own training data. This greatly improves OpenQA retrieval on Natural Questions, SQuAD, and TriviaQA, and the resulting system attains state-of-the-art extractive OpenQA performance on all three datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khattab-etal-2021-relevance">
<titleInfo>
<title>Relevance-guided Supervision for OpenQA with ColBERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Omar</namePart>
<namePart type="family">Khattab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Potts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matei</namePart>
<namePart type="family">Zaharia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Systems for Open-Domain Question Answering (OpenQA) generally depend on a retriever for finding candidate passages in a large corpus and a reader for extracting answers from those passages. In much recent work, the retriever is a learned component that uses coarse-grained vector representations of questions and passages. We argue that this modeling choice is insufficiently expressive for dealing with the complexity of natural language questions. To address this, we define ColBERT-QA, which adapts the scalable neural retrieval model ColBERT to OpenQA. ColBERT creates fine-grained interactions between questions and passages. We propose an efficient weak supervision strategy that iteratively uses ColBERT to create its own training data. This greatly improves OpenQA retrieval on Natural Questions, SQuAD, and TriviaQA, and the resulting system attains state-of-the-art extractive OpenQA performance on all three datasets.</abstract>
<identifier type="citekey">khattab-etal-2021-relevance</identifier>
<identifier type="doi">10.1162/tacl_a_00405</identifier>
<location>
<url>https://aclanthology.org/2021.tacl-1.55</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>9</number></detail>
<extent unit="page">
<start>929</start>
<end>944</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Relevance-guided Supervision for OpenQA with ColBERT
%A Khattab, Omar
%A Potts, Christopher
%A Zaharia, Matei
%J Transactions of the Association for Computational Linguistics
%D 2021
%V 9
%I MIT Press
%C Cambridge, MA
%F khattab-etal-2021-relevance
%X Systems for Open-Domain Question Answering (OpenQA) generally depend on a retriever for finding candidate passages in a large corpus and a reader for extracting answers from those passages. In much recent work, the retriever is a learned component that uses coarse-grained vector representations of questions and passages. We argue that this modeling choice is insufficiently expressive for dealing with the complexity of natural language questions. To address this, we define ColBERT-QA, which adapts the scalable neural retrieval model ColBERT to OpenQA. ColBERT creates fine-grained interactions between questions and passages. We propose an efficient weak supervision strategy that iteratively uses ColBERT to create its own training data. This greatly improves OpenQA retrieval on Natural Questions, SQuAD, and TriviaQA, and the resulting system attains state-of-the-art extractive OpenQA performance on all three datasets.
%R 10.1162/tacl_a_00405
%U https://aclanthology.org/2021.tacl-1.55
%U https://doi.org/10.1162/tacl_a_00405
%P 929-944
Markdown (Informal)
[Relevance-guided Supervision for OpenQA with ColBERT](https://aclanthology.org/2021.tacl-1.55) (Khattab et al., TACL 2021)
ACL