@inproceedings{chopard-etal-2021-learning,
title = "Learning Data Augmentation Schedules for Natural Language Processing",
author = "Chopard, Daphn{\'e} and
Treder, Matthias S. and
Spasi{\'c}, Irena",
editor = "Sedoc, Jo{\~a}o and
Rogers, Anna and
Rumshisky, Anna and
Tafreshi, Shabnam",
booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.insights-1.14",
doi = "10.18653/v1/2021.insights-1.14",
pages = "89--102",
abstract = "Despite its proven efficiency in other fields, data augmentation is less popular in the context of natural language processing (NLP) due to its complexity and limited results. A recent study (Longpre et al., 2020) showed for example that task-agnostic data augmentations fail to consistently boost the performance of pretrained transformers even in low data regimes. In this paper, we investigate whether data-driven augmentation scheduling and the integration of a wider set of transformations can lead to improved performance where fixed and limited policies were unsuccessful. Our results suggest that, while this approach can help the training process in some settings, the improvements are unsubstantial. This negative result is meant to help researchers better understand the limitations of data augmentation for NLP.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chopard-etal-2021-learning">
<titleInfo>
<title>Learning Data Augmentation Schedules for Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daphné</namePart>
<namePart type="family">Chopard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Treder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irena</namePart>
<namePart type="family">Spasić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite its proven efficiency in other fields, data augmentation is less popular in the context of natural language processing (NLP) due to its complexity and limited results. A recent study (Longpre et al., 2020) showed for example that task-agnostic data augmentations fail to consistently boost the performance of pretrained transformers even in low data regimes. In this paper, we investigate whether data-driven augmentation scheduling and the integration of a wider set of transformations can lead to improved performance where fixed and limited policies were unsuccessful. Our results suggest that, while this approach can help the training process in some settings, the improvements are unsubstantial. This negative result is meant to help researchers better understand the limitations of data augmentation for NLP.</abstract>
<identifier type="citekey">chopard-etal-2021-learning</identifier>
<identifier type="doi">10.18653/v1/2021.insights-1.14</identifier>
<location>
<url>https://aclanthology.org/2021.insights-1.14</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>89</start>
<end>102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Data Augmentation Schedules for Natural Language Processing
%A Chopard, Daphné
%A Treder, Matthias S.
%A Spasić, Irena
%Y Sedoc, João
%Y Rogers, Anna
%Y Rumshisky, Anna
%Y Tafreshi, Shabnam
%S Proceedings of the Second Workshop on Insights from Negative Results in NLP
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F chopard-etal-2021-learning
%X Despite its proven efficiency in other fields, data augmentation is less popular in the context of natural language processing (NLP) due to its complexity and limited results. A recent study (Longpre et al., 2020) showed for example that task-agnostic data augmentations fail to consistently boost the performance of pretrained transformers even in low data regimes. In this paper, we investigate whether data-driven augmentation scheduling and the integration of a wider set of transformations can lead to improved performance where fixed and limited policies were unsuccessful. Our results suggest that, while this approach can help the training process in some settings, the improvements are unsubstantial. This negative result is meant to help researchers better understand the limitations of data augmentation for NLP.
%R 10.18653/v1/2021.insights-1.14
%U https://aclanthology.org/2021.insights-1.14
%U https://doi.org/10.18653/v1/2021.insights-1.14
%P 89-102
Markdown (Informal)
[Learning Data Augmentation Schedules for Natural Language Processing](https://aclanthology.org/2021.insights-1.14) (Chopard et al., insights 2021)
ACL