@inproceedings{shamanna-girishekar-etal-2021-training,
title = "Training Language Models under Resource Constraints for Adversarial Advertisement Detection",
author = "Shamanna Girishekar, Eshwar and
Surya, Shiv and
Nikhil, Nishant and
Sil, Dyut Kumar and
Negi, Sumit and
Rajan, Aruna",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.35",
doi = "10.18653/v1/2021.naacl-industry.35",
pages = "280--287",
abstract = "Advertising on e-commerce and social media sites deliver ad impressions at web scale on a daily basis driving value to both shoppers and advertisers. This scale necessitates programmatic ways of detecting unsuitable content in ads to safeguard customer experience and trust. This paper focusses on techniques for training text classification models under resource constraints, built as part of automated solutions for advertising content moderation. We show how weak supervision, curriculum learning and multi-lingual training can be applied effectively to fine-tune BERT and its variants for text classification tasks in conjunction with different data augmentation strategies. Our extensive experiments on multiple languages show that these techniques detect adversarial ad categories with a substantial gain in precision at high recall threshold over the baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shamanna-girishekar-etal-2021-training">
<titleInfo>
<title>Training Language Models under Resource Constraints for Adversarial Advertisement Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eshwar</namePart>
<namePart type="family">Shamanna Girishekar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiv</namePart>
<namePart type="family">Surya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nishant</namePart>
<namePart type="family">Nikhil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dyut</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumit</namePart>
<namePart type="family">Negi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aruna</namePart>
<namePart type="family">Rajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Advertising on e-commerce and social media sites deliver ad impressions at web scale on a daily basis driving value to both shoppers and advertisers. This scale necessitates programmatic ways of detecting unsuitable content in ads to safeguard customer experience and trust. This paper focusses on techniques for training text classification models under resource constraints, built as part of automated solutions for advertising content moderation. We show how weak supervision, curriculum learning and multi-lingual training can be applied effectively to fine-tune BERT and its variants for text classification tasks in conjunction with different data augmentation strategies. Our extensive experiments on multiple languages show that these techniques detect adversarial ad categories with a substantial gain in precision at high recall threshold over the baseline.</abstract>
<identifier type="citekey">shamanna-girishekar-etal-2021-training</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.35</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.35</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>280</start>
<end>287</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Training Language Models under Resource Constraints for Adversarial Advertisement Detection
%A Shamanna Girishekar, Eshwar
%A Surya, Shiv
%A Nikhil, Nishant
%A Sil, Dyut Kumar
%A Negi, Sumit
%A Rajan, Aruna
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F shamanna-girishekar-etal-2021-training
%X Advertising on e-commerce and social media sites deliver ad impressions at web scale on a daily basis driving value to both shoppers and advertisers. This scale necessitates programmatic ways of detecting unsuitable content in ads to safeguard customer experience and trust. This paper focusses on techniques for training text classification models under resource constraints, built as part of automated solutions for advertising content moderation. We show how weak supervision, curriculum learning and multi-lingual training can be applied effectively to fine-tune BERT and its variants for text classification tasks in conjunction with different data augmentation strategies. Our extensive experiments on multiple languages show that these techniques detect adversarial ad categories with a substantial gain in precision at high recall threshold over the baseline.
%R 10.18653/v1/2021.naacl-industry.35
%U https://aclanthology.org/2021.naacl-industry.35
%U https://doi.org/10.18653/v1/2021.naacl-industry.35
%P 280-287
Markdown (Informal)
[Training Language Models under Resource Constraints for Adversarial Advertisement Detection](https://aclanthology.org/2021.naacl-industry.35) (Shamanna Girishekar et al., NAACL 2021)
ACL