@inproceedings{cho-etal-2022-query,
title = "Query Generation with External Knowledge for Dense Retrieval",
author = "Cho, Sukmin and
Jeong, Soyeong and
Yang, Wonsuk and
Park, Jong",
editor = "Agirre, Eneko and
Apidianaki, Marianna and
Vuli{\'c}, Ivan",
booktitle = "Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures",
month = may,
year = "2022",
address = "Dublin, Ireland and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.deelio-1.3",
doi = "10.18653/v1/2022.deelio-1.3",
pages = "22--32",
abstract = "Dense retrieval aims at searching for the most relevant documents to the given query by encoding texts in the embedding space, requiring a large amount of query-document pairs to train. Since manually constructing such training data is challenging, recent work has proposed to generate synthetic queries from documents and use them to train a dense retriever. However, compared to the manually composed queries, synthetic queries do not generally ask for implicit information, therefore leading to a degraded retrieval performance. In this work, we propose Query Generation with External Knowledge (QGEK), a novel method for generating queries with external information related to the corresponding document. Specifically, we convert a query into a triplet-based template form to accommodate external information and transmit it to a pre-trained language model (PLM). We validate QGEK on both in-domain and out-domain dense retrieval settings. The dense retriever with the queries requiring implicit information is found to make good performance improvement. Also, such queries are similar to manually composed queries, confirmed by both human evaluation and unique {\&} non-unique words distribution.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2022-query">
<titleInfo>
<title>Query Generation with External Knowledge for Dense Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sukmin</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soyeong</namePart>
<namePart type="family">Jeong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wonsuk</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jong</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eneko</namePart>
<namePart type="family">Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dense retrieval aims at searching for the most relevant documents to the given query by encoding texts in the embedding space, requiring a large amount of query-document pairs to train. Since manually constructing such training data is challenging, recent work has proposed to generate synthetic queries from documents and use them to train a dense retriever. However, compared to the manually composed queries, synthetic queries do not generally ask for implicit information, therefore leading to a degraded retrieval performance. In this work, we propose Query Generation with External Knowledge (QGEK), a novel method for generating queries with external information related to the corresponding document. Specifically, we convert a query into a triplet-based template form to accommodate external information and transmit it to a pre-trained language model (PLM). We validate QGEK on both in-domain and out-domain dense retrieval settings. The dense retriever with the queries requiring implicit information is found to make good performance improvement. Also, such queries are similar to manually composed queries, confirmed by both human evaluation and unique & non-unique words distribution.</abstract>
<identifier type="citekey">cho-etal-2022-query</identifier>
<identifier type="doi">10.18653/v1/2022.deelio-1.3</identifier>
<location>
<url>https://aclanthology.org/2022.deelio-1.3</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>22</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Query Generation with External Knowledge for Dense Retrieval
%A Cho, Sukmin
%A Jeong, Soyeong
%A Yang, Wonsuk
%A Park, Jong
%Y Agirre, Eneko
%Y Apidianaki, Marianna
%Y Vulić, Ivan
%S Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland and Online
%F cho-etal-2022-query
%X Dense retrieval aims at searching for the most relevant documents to the given query by encoding texts in the embedding space, requiring a large amount of query-document pairs to train. Since manually constructing such training data is challenging, recent work has proposed to generate synthetic queries from documents and use them to train a dense retriever. However, compared to the manually composed queries, synthetic queries do not generally ask for implicit information, therefore leading to a degraded retrieval performance. In this work, we propose Query Generation with External Knowledge (QGEK), a novel method for generating queries with external information related to the corresponding document. Specifically, we convert a query into a triplet-based template form to accommodate external information and transmit it to a pre-trained language model (PLM). We validate QGEK on both in-domain and out-domain dense retrieval settings. The dense retriever with the queries requiring implicit information is found to make good performance improvement. Also, such queries are similar to manually composed queries, confirmed by both human evaluation and unique & non-unique words distribution.
%R 10.18653/v1/2022.deelio-1.3
%U https://aclanthology.org/2022.deelio-1.3
%U https://doi.org/10.18653/v1/2022.deelio-1.3
%P 22-32
Markdown (Informal)
[Query Generation with External Knowledge for Dense Retrieval](https://aclanthology.org/2022.deelio-1.3) (Cho et al., DeeLIO 2022)
ACL
- Sukmin Cho, Soyeong Jeong, Wonsuk Yang, and Jong Park. 2022. Query Generation with External Knowledge for Dense Retrieval. In Proceedings of Deep Learning Inside Out (DeeLIO 2022): The 3rd Workshop on Knowledge Extraction and Integration for Deep Learning Architectures, pages 22–32, Dublin, Ireland and Online. Association for Computational Linguistics.