@inproceedings{nayak-garera-2022-deploying,
title = "Deploying Unified {BERT} Moderation Model for {E}-Commerce Reviews",
author = "Nayak, Ravindra and
Garera, Nikesh",
editor = "Li, Yunyao and
Lazaridou, Angeliki",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-industry.55",
doi = "10.18653/v1/2022.emnlp-industry.55",
pages = "540--547",
abstract = "Moderation of user-generated e-commerce content has become crucial due to the large and diverse user base on the platforms. Product reviews and ratings have become an integral part of the shopping experience to build trust among users. Due to the high volume of reviews generated on a vast catalog of products, manual moderation is infeasible, making machine moderation a necessity. In this work, we described our deployed system and models for automated moderation of user-generated content. At the heart of our approach, we outline several rejection reasons for review {\&} rating moderation and explore a unified BERT model to moderate them. We convey the importance of product vertical embeddings for the relevancy of the review for a given product and highlight the advantages of pre-training the BERT models with monolingual data to cope with the domain gap in the absence of huge labelled datasets. We observe a 4.78{\%} F1 increase with less labelled data and a 2.57{\%} increase in F1 score on the review data compared to the publicly available BERT-based models. Our best model In-House-BERT-vertical sends only 5.89{\%} of total reviews to manual moderation and has been deployed in production serving live traffic for millions of users.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nayak-garera-2022-deploying">
<titleInfo>
<title>Deploying Unified BERT Moderation Model for E-Commerce Reviews</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ravindra</namePart>
<namePart type="family">Nayak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikesh</namePart>
<namePart type="family">Garera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angeliki</namePart>
<namePart type="family">Lazaridou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Moderation of user-generated e-commerce content has become crucial due to the large and diverse user base on the platforms. Product reviews and ratings have become an integral part of the shopping experience to build trust among users. Due to the high volume of reviews generated on a vast catalog of products, manual moderation is infeasible, making machine moderation a necessity. In this work, we described our deployed system and models for automated moderation of user-generated content. At the heart of our approach, we outline several rejection reasons for review & rating moderation and explore a unified BERT model to moderate them. We convey the importance of product vertical embeddings for the relevancy of the review for a given product and highlight the advantages of pre-training the BERT models with monolingual data to cope with the domain gap in the absence of huge labelled datasets. We observe a 4.78% F1 increase with less labelled data and a 2.57% increase in F1 score on the review data compared to the publicly available BERT-based models. Our best model In-House-BERT-vertical sends only 5.89% of total reviews to manual moderation and has been deployed in production serving live traffic for millions of users.</abstract>
<identifier type="citekey">nayak-garera-2022-deploying</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-industry.55</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-industry.55</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>540</start>
<end>547</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deploying Unified BERT Moderation Model for E-Commerce Reviews
%A Nayak, Ravindra
%A Garera, Nikesh
%Y Li, Yunyao
%Y Lazaridou, Angeliki
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F nayak-garera-2022-deploying
%X Moderation of user-generated e-commerce content has become crucial due to the large and diverse user base on the platforms. Product reviews and ratings have become an integral part of the shopping experience to build trust among users. Due to the high volume of reviews generated on a vast catalog of products, manual moderation is infeasible, making machine moderation a necessity. In this work, we described our deployed system and models for automated moderation of user-generated content. At the heart of our approach, we outline several rejection reasons for review & rating moderation and explore a unified BERT model to moderate them. We convey the importance of product vertical embeddings for the relevancy of the review for a given product and highlight the advantages of pre-training the BERT models with monolingual data to cope with the domain gap in the absence of huge labelled datasets. We observe a 4.78% F1 increase with less labelled data and a 2.57% increase in F1 score on the review data compared to the publicly available BERT-based models. Our best model In-House-BERT-vertical sends only 5.89% of total reviews to manual moderation and has been deployed in production serving live traffic for millions of users.
%R 10.18653/v1/2022.emnlp-industry.55
%U https://aclanthology.org/2022.emnlp-industry.55
%U https://doi.org/10.18653/v1/2022.emnlp-industry.55
%P 540-547
Markdown (Informal)
[Deploying Unified BERT Moderation Model for E-Commerce Reviews](https://aclanthology.org/2022.emnlp-industry.55) (Nayak & Garera, EMNLP 2022)
ACL