@inproceedings{joshi-etal-2022-augmenting,
title = "Augmenting Training Data for Massive Semantic Matching Models in Low-Traffic {E}-commerce Stores",
author = "Joshi, Ashutosh and
Vishwanath, Shankar and
Teo, Choon and
Petricek, Vaclav and
Vishwanathan, Vishy and
Bhagat, Rahul and
May, Jonathan",
editor = "Loukina, Anastassia and
Gangadharaiah, Rashmi and
Min, Bonan",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-industry.19",
doi = "10.18653/v1/2022.naacl-industry.19",
pages = "160--167",
abstract = "Extreme multi-label classification (XMC) systems have been successfully applied in e-commerce (Shen et al., 2020; Dahiya et al., 2021) for retrieving products based on customer behavior. Such systems require large amounts of customer behavior data (e.g. queries, clicks, purchases) for training. However, behavioral data is limited in low-traffic e-commerce stores, impacting performance of these systems. In this paper, we present a technique that augments behavioral training data via query reformulation. We use the Aggregated Label eXtreme Multi-label Classification (AL-XMC) system (Shen et al., 2020) as an example semantic matching model and show via crowd-sourced human judgments that, when the training data is augmented through query reformulations, the quality of AL-XMC improves over a baseline that does not use query reformulation. We also show in online A/B tests that our method significantly improves business metrics for the AL-XMC model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="joshi-etal-2022-augmenting">
<titleInfo>
<title>Augmenting Training Data for Massive Semantic Matching Models in Low-Traffic E-commerce Stores</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ashutosh</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shankar</namePart>
<namePart type="family">Vishwanath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Choon</namePart>
<namePart type="family">Teo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vaclav</namePart>
<namePart type="family">Petricek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishy</namePart>
<namePart type="family">Vishwanathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Bhagat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Loukina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Gangadharaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonan</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extreme multi-label classification (XMC) systems have been successfully applied in e-commerce (Shen et al., 2020; Dahiya et al., 2021) for retrieving products based on customer behavior. Such systems require large amounts of customer behavior data (e.g. queries, clicks, purchases) for training. However, behavioral data is limited in low-traffic e-commerce stores, impacting performance of these systems. In this paper, we present a technique that augments behavioral training data via query reformulation. We use the Aggregated Label eXtreme Multi-label Classification (AL-XMC) system (Shen et al., 2020) as an example semantic matching model and show via crowd-sourced human judgments that, when the training data is augmented through query reformulations, the quality of AL-XMC improves over a baseline that does not use query reformulation. We also show in online A/B tests that our method significantly improves business metrics for the AL-XMC model.</abstract>
<identifier type="citekey">joshi-etal-2022-augmenting</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-industry.19</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-industry.19</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>160</start>
<end>167</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Augmenting Training Data for Massive Semantic Matching Models in Low-Traffic E-commerce Stores
%A Joshi, Ashutosh
%A Vishwanath, Shankar
%A Teo, Choon
%A Petricek, Vaclav
%A Vishwanathan, Vishy
%A Bhagat, Rahul
%A May, Jonathan
%Y Loukina, Anastassia
%Y Gangadharaiah, Rashmi
%Y Min, Bonan
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F joshi-etal-2022-augmenting
%X Extreme multi-label classification (XMC) systems have been successfully applied in e-commerce (Shen et al., 2020; Dahiya et al., 2021) for retrieving products based on customer behavior. Such systems require large amounts of customer behavior data (e.g. queries, clicks, purchases) for training. However, behavioral data is limited in low-traffic e-commerce stores, impacting performance of these systems. In this paper, we present a technique that augments behavioral training data via query reformulation. We use the Aggregated Label eXtreme Multi-label Classification (AL-XMC) system (Shen et al., 2020) as an example semantic matching model and show via crowd-sourced human judgments that, when the training data is augmented through query reformulations, the quality of AL-XMC improves over a baseline that does not use query reformulation. We also show in online A/B tests that our method significantly improves business metrics for the AL-XMC model.
%R 10.18653/v1/2022.naacl-industry.19
%U https://aclanthology.org/2022.naacl-industry.19
%U https://doi.org/10.18653/v1/2022.naacl-industry.19
%P 160-167
Markdown (Informal)
[Augmenting Training Data for Massive Semantic Matching Models in Low-Traffic E-commerce Stores](https://aclanthology.org/2022.naacl-industry.19) (Joshi et al., NAACL 2022)
ACL
- Ashutosh Joshi, Shankar Vishwanath, Choon Teo, Vaclav Petricek, Vishy Vishwanathan, Rahul Bhagat, and Jonathan May. 2022. Augmenting Training Data for Massive Semantic Matching Models in Low-Traffic E-commerce Stores. In Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track, pages 160–167, Hybrid: Seattle, Washington + Online. Association for Computational Linguistics.