@inproceedings{zhu-etal-2025-query,
title = "Query-{LIFE}: Query-aware Language Image Fusion Embedding for {E}-Commerce Relevance",
author = "Zhu, Hai and
Guo, Yuankai and
Dou, Ronggang and
Liu, Kai",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-industry.2/",
pages = "21--28",
abstract = "Relevance module plays a fundamental role in e-commerce search as they are responsible for selecting relevant products from thousands of items based on user queries, thereby enhancing users experience and efficiency. The traditional method calculates the relevance score based on product titles and user queries, but the information in title alone maybe insufficient to describe the product completely. A more general method is to further leverage product image information. In recent years, vision-language pre-training model has achieved impressive results in many scenarios, which leverage contrastive learning to map both textual and visual features into a joint embedding space. In e-commerce, a common practice is to further fine-tune the model using e-commerce data on the basis of pre-trained model. However, the performance is sub-optimal because the vision-language pre-training models lack of alignment specifically designed for queries. In this paper, we propose Query-aware Language Image Fusion Embedding to address these challenges. Query-LIFE utilizes a query-based multimodal fusion to effectively incorporate the image and title based on the product types. Additionally, it employs query-aware modal alignment to enhance the accuracy of the comprehensive representation of products. Furthermore, we design GenFilt, which utilizes the generation capability of large models to filter out false negative samples and further improve the overall performance of the contrastive learning task in the model. Experiments have demonstrated that Query-LIFE outperforms existing baselines. We have conducted ablation studies and human evaluations to validate the effectiveness of each module within Query-LIFE. Moreover, Query-LIFE has been deployed on Miravia Search. resulting in improved both relevance and conversion efficiency."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2025-query">
<titleInfo>
<title>Query-LIFE: Query-aware Language Image Fusion Embedding for E-Commerce Relevance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuankai</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronggang</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Relevance module plays a fundamental role in e-commerce search as they are responsible for selecting relevant products from thousands of items based on user queries, thereby enhancing users experience and efficiency. The traditional method calculates the relevance score based on product titles and user queries, but the information in title alone maybe insufficient to describe the product completely. A more general method is to further leverage product image information. In recent years, vision-language pre-training model has achieved impressive results in many scenarios, which leverage contrastive learning to map both textual and visual features into a joint embedding space. In e-commerce, a common practice is to further fine-tune the model using e-commerce data on the basis of pre-trained model. However, the performance is sub-optimal because the vision-language pre-training models lack of alignment specifically designed for queries. In this paper, we propose Query-aware Language Image Fusion Embedding to address these challenges. Query-LIFE utilizes a query-based multimodal fusion to effectively incorporate the image and title based on the product types. Additionally, it employs query-aware modal alignment to enhance the accuracy of the comprehensive representation of products. Furthermore, we design GenFilt, which utilizes the generation capability of large models to filter out false negative samples and further improve the overall performance of the contrastive learning task in the model. Experiments have demonstrated that Query-LIFE outperforms existing baselines. We have conducted ablation studies and human evaluations to validate the effectiveness of each module within Query-LIFE. Moreover, Query-LIFE has been deployed on Miravia Search. resulting in improved both relevance and conversion efficiency.</abstract>
<identifier type="citekey">zhu-etal-2025-query</identifier>
<location>
<url>https://aclanthology.org/2025.coling-industry.2/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>21</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Query-LIFE: Query-aware Language Image Fusion Embedding for E-Commerce Relevance
%A Zhu, Hai
%A Guo, Yuankai
%A Dou, Ronggang
%A Liu, Kai
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%Y Darwish, Kareem
%Y Agarwal, Apoorv
%S Proceedings of the 31st International Conference on Computational Linguistics: Industry Track
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhu-etal-2025-query
%X Relevance module plays a fundamental role in e-commerce search as they are responsible for selecting relevant products from thousands of items based on user queries, thereby enhancing users experience and efficiency. The traditional method calculates the relevance score based on product titles and user queries, but the information in title alone maybe insufficient to describe the product completely. A more general method is to further leverage product image information. In recent years, vision-language pre-training model has achieved impressive results in many scenarios, which leverage contrastive learning to map both textual and visual features into a joint embedding space. In e-commerce, a common practice is to further fine-tune the model using e-commerce data on the basis of pre-trained model. However, the performance is sub-optimal because the vision-language pre-training models lack of alignment specifically designed for queries. In this paper, we propose Query-aware Language Image Fusion Embedding to address these challenges. Query-LIFE utilizes a query-based multimodal fusion to effectively incorporate the image and title based on the product types. Additionally, it employs query-aware modal alignment to enhance the accuracy of the comprehensive representation of products. Furthermore, we design GenFilt, which utilizes the generation capability of large models to filter out false negative samples and further improve the overall performance of the contrastive learning task in the model. Experiments have demonstrated that Query-LIFE outperforms existing baselines. We have conducted ablation studies and human evaluations to validate the effectiveness of each module within Query-LIFE. Moreover, Query-LIFE has been deployed on Miravia Search. resulting in improved both relevance and conversion efficiency.
%U https://aclanthology.org/2025.coling-industry.2/
%P 21-28
Markdown (Informal)
[Query-LIFE: Query-aware Language Image Fusion Embedding for E-Commerce Relevance](https://aclanthology.org/2025.coling-industry.2/) (Zhu et al., COLING 2025)
ACL