@inproceedings{rana-etal-2023-weakly,
title = "Weakly supervised hierarchical multi-task classification of customer questions",
author = "Rana, Jitenkumar and
Yenigalla, Promod and
Aggarwal, Chetan and
Mukku, Sandeep Sricharan and
Soni, Manan and
Patange, Rashmi",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.75",
doi = "10.18653/v1/2023.acl-industry.75",
pages = "786--793",
abstract = "Identifying granular and actionable topics from customer questions (CQ) posted on e-commerce websites helps surface the missing information expected by customers on the product detail page (DP), provide insights to brands and sellers on what critical product information that the customers are looking before making a purchase decision and helps enrich the catalog quality to improve the overall customer experience (CX). We propose a weakly supervised Hierarchical Multi-task Classification Framework (HMCF) to identify topics from customer questions at various granularities. Complexity lies in creating a list of granular topics (taxonomy) for 1000s of product categories and building a scalable classification system. To this end, we introduce a clustering based Taxonomy Creation and Data Labeling (TCDL) module for creating taxonomy and labelled data with minimal supervision. Using TCDL module, taxonomy and labelled data creation task reduces to 2 hours as compared to 2 weeks of manual efforts by a subject matter expert. For classification, we propose a two level HMCF that performs multi-class classification to identify coarse level-1 topic and leverages NLI based label-aware approach to identify granular level-2 topic. We showcase that HMCF (based on BERT and NLI) a) achieves absolute improvement of 13{\%} in Top-1 accuracy over single-task non-hierarchical baselines b) learns a generic domain invariant function that can adapt to constantly evolving taxonomy (open label set) without need of re-training. c) reduces model deployment efforts significantly since it needs only one model that caters to 1000s of product categories.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rana-etal-2023-weakly">
<titleInfo>
<title>Weakly supervised hierarchical multi-task classification of customer questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jitenkumar</namePart>
<namePart type="family">Rana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Promod</namePart>
<namePart type="family">Yenigalla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chetan</namePart>
<namePart type="family">Aggarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandeep</namePart>
<namePart type="given">Sricharan</namePart>
<namePart type="family">Mukku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manan</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Patange</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Identifying granular and actionable topics from customer questions (CQ) posted on e-commerce websites helps surface the missing information expected by customers on the product detail page (DP), provide insights to brands and sellers on what critical product information that the customers are looking before making a purchase decision and helps enrich the catalog quality to improve the overall customer experience (CX). We propose a weakly supervised Hierarchical Multi-task Classification Framework (HMCF) to identify topics from customer questions at various granularities. Complexity lies in creating a list of granular topics (taxonomy) for 1000s of product categories and building a scalable classification system. To this end, we introduce a clustering based Taxonomy Creation and Data Labeling (TCDL) module for creating taxonomy and labelled data with minimal supervision. Using TCDL module, taxonomy and labelled data creation task reduces to 2 hours as compared to 2 weeks of manual efforts by a subject matter expert. For classification, we propose a two level HMCF that performs multi-class classification to identify coarse level-1 topic and leverages NLI based label-aware approach to identify granular level-2 topic. We showcase that HMCF (based on BERT and NLI) a) achieves absolute improvement of 13% in Top-1 accuracy over single-task non-hierarchical baselines b) learns a generic domain invariant function that can adapt to constantly evolving taxonomy (open label set) without need of re-training. c) reduces model deployment efforts significantly since it needs only one model that caters to 1000s of product categories.</abstract>
<identifier type="citekey">rana-etal-2023-weakly</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.75</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.75</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>786</start>
<end>793</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Weakly supervised hierarchical multi-task classification of customer questions
%A Rana, Jitenkumar
%A Yenigalla, Promod
%A Aggarwal, Chetan
%A Mukku, Sandeep Sricharan
%A Soni, Manan
%A Patange, Rashmi
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F rana-etal-2023-weakly
%X Identifying granular and actionable topics from customer questions (CQ) posted on e-commerce websites helps surface the missing information expected by customers on the product detail page (DP), provide insights to brands and sellers on what critical product information that the customers are looking before making a purchase decision and helps enrich the catalog quality to improve the overall customer experience (CX). We propose a weakly supervised Hierarchical Multi-task Classification Framework (HMCF) to identify topics from customer questions at various granularities. Complexity lies in creating a list of granular topics (taxonomy) for 1000s of product categories and building a scalable classification system. To this end, we introduce a clustering based Taxonomy Creation and Data Labeling (TCDL) module for creating taxonomy and labelled data with minimal supervision. Using TCDL module, taxonomy and labelled data creation task reduces to 2 hours as compared to 2 weeks of manual efforts by a subject matter expert. For classification, we propose a two level HMCF that performs multi-class classification to identify coarse level-1 topic and leverages NLI based label-aware approach to identify granular level-2 topic. We showcase that HMCF (based on BERT and NLI) a) achieves absolute improvement of 13% in Top-1 accuracy over single-task non-hierarchical baselines b) learns a generic domain invariant function that can adapt to constantly evolving taxonomy (open label set) without need of re-training. c) reduces model deployment efforts significantly since it needs only one model that caters to 1000s of product categories.
%R 10.18653/v1/2023.acl-industry.75
%U https://aclanthology.org/2023.acl-industry.75
%U https://doi.org/10.18653/v1/2023.acl-industry.75
%P 786-793
Markdown (Informal)
[Weakly supervised hierarchical multi-task classification of customer questions](https://aclanthology.org/2023.acl-industry.75) (Rana et al., ACL 2023)
ACL