@inproceedings{yi-etal-2022-effective,
title = "Effective and Efficient Query-aware Snippet Extraction for Web Search",
author = "Yi, Jingwei and
Wu, Fangzhao and
Wu, Chuhan and
Huang, Xiaolong and
Jiao, Binxing and
Sun, Guangzhong and
Xie, Xing",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.197",
doi = "10.18653/v1/2022.emnlp-main.197",
pages = "3035--3046",
abstract = "Query-aware webpage snippet extraction is widely used in search engines to help users better understand the content of the returned webpages before clicking. The extracted snippet is expected to summarize the webpage in the context of the input query. Existing snippet extraction methods mainly rely on handcrafted features of overlapping words, which cannot capture deep semantic relationships between the query and webpages. Another idea is to extract the sentences which are most relevant to queries as snippets with existing text matching methods. However, these methods ignore the contextual information of webpages, which may be sub-optimal. In this paper, we propose an effective query-aware webpage snippet extraction method named DeepQSE. In DeepQSE, the concatenation of title, query and each candidate sentence serves as an input of query-aware sentence encoder, aiming to capture the fine-grained relevance between the query and sentences. Then, these query-aware sentence representations are modeled jointly through a document-aware relevance encoder to capture contextual information of the webpage. Since the query and each sentence are jointly modeled in DeepQSE, its online inference may be slow. Thus, we further propose an efficient version of DeepQSE, named Efficient-DeepQSE, which can significantly improve the inference speed of DeepQSE without affecting its performance. The core idea of Efficient-DeepQSE is to decompose the query-aware snippet extraction task into two stages, i.e., a coarse-grained candidate sentence selection stage where sentence representations can be cached, and a fine-grained relevance modeling stage. Experiments on two datasets validate the effectiveness and efficiency of our methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yi-etal-2022-effective">
<titleInfo>
<title>Effective and Efficient Query-aware Snippet Extraction for Web Search</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingwei</namePart>
<namePart type="family">Yi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fangzhao</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuhan</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaolong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Binxing</namePart>
<namePart type="family">Jiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guangzhong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Query-aware webpage snippet extraction is widely used in search engines to help users better understand the content of the returned webpages before clicking. The extracted snippet is expected to summarize the webpage in the context of the input query. Existing snippet extraction methods mainly rely on handcrafted features of overlapping words, which cannot capture deep semantic relationships between the query and webpages. Another idea is to extract the sentences which are most relevant to queries as snippets with existing text matching methods. However, these methods ignore the contextual information of webpages, which may be sub-optimal. In this paper, we propose an effective query-aware webpage snippet extraction method named DeepQSE. In DeepQSE, the concatenation of title, query and each candidate sentence serves as an input of query-aware sentence encoder, aiming to capture the fine-grained relevance between the query and sentences. Then, these query-aware sentence representations are modeled jointly through a document-aware relevance encoder to capture contextual information of the webpage. Since the query and each sentence are jointly modeled in DeepQSE, its online inference may be slow. Thus, we further propose an efficient version of DeepQSE, named Efficient-DeepQSE, which can significantly improve the inference speed of DeepQSE without affecting its performance. The core idea of Efficient-DeepQSE is to decompose the query-aware snippet extraction task into two stages, i.e., a coarse-grained candidate sentence selection stage where sentence representations can be cached, and a fine-grained relevance modeling stage. Experiments on two datasets validate the effectiveness and efficiency of our methods.</abstract>
<identifier type="citekey">yi-etal-2022-effective</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.197</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.197</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>3035</start>
<end>3046</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Effective and Efficient Query-aware Snippet Extraction for Web Search
%A Yi, Jingwei
%A Wu, Fangzhao
%A Wu, Chuhan
%A Huang, Xiaolong
%A Jiao, Binxing
%A Sun, Guangzhong
%A Xie, Xing
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F yi-etal-2022-effective
%X Query-aware webpage snippet extraction is widely used in search engines to help users better understand the content of the returned webpages before clicking. The extracted snippet is expected to summarize the webpage in the context of the input query. Existing snippet extraction methods mainly rely on handcrafted features of overlapping words, which cannot capture deep semantic relationships between the query and webpages. Another idea is to extract the sentences which are most relevant to queries as snippets with existing text matching methods. However, these methods ignore the contextual information of webpages, which may be sub-optimal. In this paper, we propose an effective query-aware webpage snippet extraction method named DeepQSE. In DeepQSE, the concatenation of title, query and each candidate sentence serves as an input of query-aware sentence encoder, aiming to capture the fine-grained relevance between the query and sentences. Then, these query-aware sentence representations are modeled jointly through a document-aware relevance encoder to capture contextual information of the webpage. Since the query and each sentence are jointly modeled in DeepQSE, its online inference may be slow. Thus, we further propose an efficient version of DeepQSE, named Efficient-DeepQSE, which can significantly improve the inference speed of DeepQSE without affecting its performance. The core idea of Efficient-DeepQSE is to decompose the query-aware snippet extraction task into two stages, i.e., a coarse-grained candidate sentence selection stage where sentence representations can be cached, and a fine-grained relevance modeling stage. Experiments on two datasets validate the effectiveness and efficiency of our methods.
%R 10.18653/v1/2022.emnlp-main.197
%U https://aclanthology.org/2022.emnlp-main.197
%U https://doi.org/10.18653/v1/2022.emnlp-main.197
%P 3035-3046
Markdown (Informal)
[Effective and Efficient Query-aware Snippet Extraction for Web Search](https://aclanthology.org/2022.emnlp-main.197) (Yi et al., EMNLP 2022)
ACL
- Jingwei Yi, Fangzhao Wu, Chuhan Wu, Xiaolong Huang, Binxing Jiao, Guangzhong Sun, and Xing Xie. 2022. Effective and Efficient Query-aware Snippet Extraction for Web Search. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 3035–3046, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.