@inproceedings{nishikawa-etal-2025-search,
title = "Search Query Embeddings via User-behavior-driven Contrastive Learning",
author = "Nishikawa, Sosuke and
Hirako, Jun and
Kaji, Nobuhiro and
Watanabe, Koki and
Asano, Hiroki and
Yamashiro, Souta and
Sano, Shumpei",
editor = "Chen, Weizhu and
Yang, Yi and
Kachuee, Mohammad and
Fu, Xue-Yong",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-industry.12/",
doi = "10.18653/v1/2025.naacl-industry.12",
pages = "138--147",
ISBN = "979-8-89176-194-0",
abstract = "Universal query embeddings that accurately capture the semantic meaning of search queries are crucial for supporting a range of query understanding (QU) tasks within enterprises.However, current embedding approaches often struggle to effectively represent queries due to the shortness of search queries and their tendency for surface-level variations.We propose a user-behavior-driven contrastive learning approach which directly aligns embeddings according to user intent.This approach uses intent-aligned query pairs as positive examples, derived from two types of real-world user interactions: (1) clickthrough data, in which queries leading to clicks on the same URLs are assumed to share the same intent, and (2) session data, in which queries within the same user session are considered to share intent.By incorporating these query pairs into a robust contrastive learning framework, we can construct query embedding models that align with user intent while minimizing reliance on surface-level lexical similarities.Evaluations on real-world QU tasks demonstrated that these models substantially outperformed state-of-the-art text embedding models such as mE5 and SimCSE.Our models have been deployed in our search engine to support QU technologies."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nishikawa-etal-2025-search">
<titleInfo>
<title>Search Query Embeddings via User-behavior-driven Contrastive Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sosuke</namePart>
<namePart type="family">Nishikawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Hirako</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nobuhiro</namePart>
<namePart type="family">Kaji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koki</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroki</namePart>
<namePart type="family">Asano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Souta</namePart>
<namePart type="family">Yamashiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shumpei</namePart>
<namePart type="family">Sano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weizhu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kachuee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue-Yong</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-194-0</identifier>
</relatedItem>
<abstract>Universal query embeddings that accurately capture the semantic meaning of search queries are crucial for supporting a range of query understanding (QU) tasks within enterprises.However, current embedding approaches often struggle to effectively represent queries due to the shortness of search queries and their tendency for surface-level variations.We propose a user-behavior-driven contrastive learning approach which directly aligns embeddings according to user intent.This approach uses intent-aligned query pairs as positive examples, derived from two types of real-world user interactions: (1) clickthrough data, in which queries leading to clicks on the same URLs are assumed to share the same intent, and (2) session data, in which queries within the same user session are considered to share intent.By incorporating these query pairs into a robust contrastive learning framework, we can construct query embedding models that align with user intent while minimizing reliance on surface-level lexical similarities.Evaluations on real-world QU tasks demonstrated that these models substantially outperformed state-of-the-art text embedding models such as mE5 and SimCSE.Our models have been deployed in our search engine to support QU technologies.</abstract>
<identifier type="citekey">nishikawa-etal-2025-search</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-industry.12</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-industry.12/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>138</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Search Query Embeddings via User-behavior-driven Contrastive Learning
%A Nishikawa, Sosuke
%A Hirako, Jun
%A Kaji, Nobuhiro
%A Watanabe, Koki
%A Asano, Hiroki
%A Yamashiro, Souta
%A Sano, Shumpei
%Y Chen, Weizhu
%Y Yang, Yi
%Y Kachuee, Mohammad
%Y Fu, Xue-Yong
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-194-0
%F nishikawa-etal-2025-search
%X Universal query embeddings that accurately capture the semantic meaning of search queries are crucial for supporting a range of query understanding (QU) tasks within enterprises.However, current embedding approaches often struggle to effectively represent queries due to the shortness of search queries and their tendency for surface-level variations.We propose a user-behavior-driven contrastive learning approach which directly aligns embeddings according to user intent.This approach uses intent-aligned query pairs as positive examples, derived from two types of real-world user interactions: (1) clickthrough data, in which queries leading to clicks on the same URLs are assumed to share the same intent, and (2) session data, in which queries within the same user session are considered to share intent.By incorporating these query pairs into a robust contrastive learning framework, we can construct query embedding models that align with user intent while minimizing reliance on surface-level lexical similarities.Evaluations on real-world QU tasks demonstrated that these models substantially outperformed state-of-the-art text embedding models such as mE5 and SimCSE.Our models have been deployed in our search engine to support QU technologies.
%R 10.18653/v1/2025.naacl-industry.12
%U https://aclanthology.org/2025.naacl-industry.12/
%U https://doi.org/10.18653/v1/2025.naacl-industry.12
%P 138-147
Markdown (Informal)
[Search Query Embeddings via User-behavior-driven Contrastive Learning](https://aclanthology.org/2025.naacl-industry.12/) (Nishikawa et al., NAACL 2025)
ACL
- Sosuke Nishikawa, Jun Hirako, Nobuhiro Kaji, Koki Watanabe, Hiroki Asano, Souta Yamashiro, and Shumpei Sano. 2025. Search Query Embeddings via User-behavior-driven Contrastive Learning. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: Industry Track), pages 138–147, Albuquerque, New Mexico. Association for Computational Linguistics.