@inproceedings{lowell-etal-2021-unsupervised,
title = "Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data",
author = "Lowell, David and
Howard, Brian and
Lipton, Zachary C. and
Wallace, Byron",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.408",
doi = "10.18653/v1/2021.emnlp-main.408",
pages = "4992--5001",
abstract = "Unsupervised Data Augmentation (UDA) is a semisupervised technique that applies a consistency loss to penalize differences between a model{'}s predictions on (a) observed (unlabeled) examples; and (b) corresponding {`}noised{'} examples produced via data augmentation. While UDA has gained popularity for text classification, open questions linger over which design decisions are necessary and how to extend the method to sequence labeling tasks. In this paper, we re-examine UDA and demonstrate its efficacy on several sequential tasks. Our main contribution is an empirical study of UDA to establish which components of the algorithm confer benefits in NLP. Notably, although prior work has emphasized the use of clever augmentation techniques including back-translation, we find that enforcing consistency between predictions assigned to observed and randomly substituted words often yields comparable (or greater) benefits compared to these more complex perturbation models. Furthermore, we find that applying UDA{'}s consistency loss affords meaningful gains without any unlabeled data at all, i.e., in a standard supervised setting. In short, UDA need not be unsupervised to realize much of its noted benefits, and does not require complex data augmentation to be effective.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lowell-etal-2021-unsupervised">
<titleInfo>
<title>Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Lowell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Howard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zachary</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Lipton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byron</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Unsupervised Data Augmentation (UDA) is a semisupervised technique that applies a consistency loss to penalize differences between a model’s predictions on (a) observed (unlabeled) examples; and (b) corresponding ‘noised’ examples produced via data augmentation. While UDA has gained popularity for text classification, open questions linger over which design decisions are necessary and how to extend the method to sequence labeling tasks. In this paper, we re-examine UDA and demonstrate its efficacy on several sequential tasks. Our main contribution is an empirical study of UDA to establish which components of the algorithm confer benefits in NLP. Notably, although prior work has emphasized the use of clever augmentation techniques including back-translation, we find that enforcing consistency between predictions assigned to observed and randomly substituted words often yields comparable (or greater) benefits compared to these more complex perturbation models. Furthermore, we find that applying UDA’s consistency loss affords meaningful gains without any unlabeled data at all, i.e., in a standard supervised setting. In short, UDA need not be unsupervised to realize much of its noted benefits, and does not require complex data augmentation to be effective.</abstract>
<identifier type="citekey">lowell-etal-2021-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.408</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.408</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4992</start>
<end>5001</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data
%A Lowell, David
%A Howard, Brian
%A Lipton, Zachary C.
%A Wallace, Byron
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F lowell-etal-2021-unsupervised
%X Unsupervised Data Augmentation (UDA) is a semisupervised technique that applies a consistency loss to penalize differences between a model’s predictions on (a) observed (unlabeled) examples; and (b) corresponding ‘noised’ examples produced via data augmentation. While UDA has gained popularity for text classification, open questions linger over which design decisions are necessary and how to extend the method to sequence labeling tasks. In this paper, we re-examine UDA and demonstrate its efficacy on several sequential tasks. Our main contribution is an empirical study of UDA to establish which components of the algorithm confer benefits in NLP. Notably, although prior work has emphasized the use of clever augmentation techniques including back-translation, we find that enforcing consistency between predictions assigned to observed and randomly substituted words often yields comparable (or greater) benefits compared to these more complex perturbation models. Furthermore, we find that applying UDA’s consistency loss affords meaningful gains without any unlabeled data at all, i.e., in a standard supervised setting. In short, UDA need not be unsupervised to realize much of its noted benefits, and does not require complex data augmentation to be effective.
%R 10.18653/v1/2021.emnlp-main.408
%U https://aclanthology.org/2021.emnlp-main.408
%U https://doi.org/10.18653/v1/2021.emnlp-main.408
%P 4992-5001
Markdown (Informal)
[Unsupervised Data Augmentation with Naive Augmentation and without Unlabeled Data](https://aclanthology.org/2021.emnlp-main.408) (Lowell et al., EMNLP 2021)
ACL