@inproceedings{zhang-etal-2025-leveraging-product,
title = "Leveraging Product Catalog Patterns for Multilingual {E}-commerce Product Attribute Prediction",
author = "Zhang, Bryan and
Khan, Suleiman A. and
Walter, SteCphan",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.18/",
pages = "267--275",
ISBN = "979-8-89176-333-3",
abstract = "E-commerce stores increasingly use Large Language Models (LLMs) to enhance catalog data quality through automated regeneration. A critical challenge is accurately predicting missing structured attribute values across multilingual product catalogs, where LLM performance varies significantly by language. While existing approaches leverage general knowledge through prompt engineering and external retrieval, more effective and accurate signals for attribute prediction can exist within the catalog ecosystem itself-similar products often share consistent patterns and structural relationships, and may have the missing attributes filled. Therefore, this paper introduces PatternRAG, a novel retrieval-augmented system that strategically leverages existing product catalog entries to guide LLM predictions for missing attributes. Our approach introduces a multi-stage retrieval framework that progressively refines the search space based on product type, uses textual similarity, glance views and brand relationships to identify the most relevant attribute-filled examples for LLM prediction guidance. Experiments on test sets across three major e-commerce stores in different languages (US, DE, FR) demonstrate substantial improvements in catalog data quality, achieving up to 34{\%} increase in recall and 0.8{\%} in precision for attribute value prediction. At catalog entry level, it also achieves up to +43.32{\%} increase in completeness and up to +2.83{\%} in correctness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-leveraging-product">
<titleInfo>
<title>Leveraging Product Catalog Patterns for Multilingual E-commerce Product Attribute Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suleiman</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SteCphan</namePart>
<namePart type="family">Walter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>E-commerce stores increasingly use Large Language Models (LLMs) to enhance catalog data quality through automated regeneration. A critical challenge is accurately predicting missing structured attribute values across multilingual product catalogs, where LLM performance varies significantly by language. While existing approaches leverage general knowledge through prompt engineering and external retrieval, more effective and accurate signals for attribute prediction can exist within the catalog ecosystem itself-similar products often share consistent patterns and structural relationships, and may have the missing attributes filled. Therefore, this paper introduces PatternRAG, a novel retrieval-augmented system that strategically leverages existing product catalog entries to guide LLM predictions for missing attributes. Our approach introduces a multi-stage retrieval framework that progressively refines the search space based on product type, uses textual similarity, glance views and brand relationships to identify the most relevant attribute-filled examples for LLM prediction guidance. Experiments on test sets across three major e-commerce stores in different languages (US, DE, FR) demonstrate substantial improvements in catalog data quality, achieving up to 34% increase in recall and 0.8% in precision for attribute value prediction. At catalog entry level, it also achieves up to +43.32% increase in completeness and up to +2.83% in correctness.</abstract>
<identifier type="citekey">zhang-etal-2025-leveraging-product</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.18/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>267</start>
<end>275</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Product Catalog Patterns for Multilingual E-commerce Product Attribute Prediction
%A Zhang, Bryan
%A Khan, Suleiman A.
%A Walter, SteCphan
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F zhang-etal-2025-leveraging-product
%X E-commerce stores increasingly use Large Language Models (LLMs) to enhance catalog data quality through automated regeneration. A critical challenge is accurately predicting missing structured attribute values across multilingual product catalogs, where LLM performance varies significantly by language. While existing approaches leverage general knowledge through prompt engineering and external retrieval, more effective and accurate signals for attribute prediction can exist within the catalog ecosystem itself-similar products often share consistent patterns and structural relationships, and may have the missing attributes filled. Therefore, this paper introduces PatternRAG, a novel retrieval-augmented system that strategically leverages existing product catalog entries to guide LLM predictions for missing attributes. Our approach introduces a multi-stage retrieval framework that progressively refines the search space based on product type, uses textual similarity, glance views and brand relationships to identify the most relevant attribute-filled examples for LLM prediction guidance. Experiments on test sets across three major e-commerce stores in different languages (US, DE, FR) demonstrate substantial improvements in catalog data quality, achieving up to 34% increase in recall and 0.8% in precision for attribute value prediction. At catalog entry level, it also achieves up to +43.32% increase in completeness and up to +2.83% in correctness.
%U https://aclanthology.org/2025.emnlp-industry.18/
%P 267-275
Markdown (Informal)
[Leveraging Product Catalog Patterns for Multilingual E-commerce Product Attribute Prediction](https://aclanthology.org/2025.emnlp-industry.18/) (Zhang et al., EMNLP 2025)
ACL