@inproceedings{piedboeuf-langlais-2023-chatgpt,
title = "Is {C}hat{GPT} the ultimate Data Augmentation Algorithm?",
author = "Piedboeuf, Fr{\'e}d{\'e}ric and
Langlais, Philippe",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.1044",
doi = "10.18653/v1/2023.findings-emnlp.1044",
pages = "15606--15615",
abstract = "In the aftermath of GPT-3.5, commonly known as ChatGPT, research have attempted to assess its capacity for lowering annotation cost, either by doing zero-shot learning, generating new data, or replacing human annotators. Some studies have also investigated its use for data augmentation (DA), but only in limited contexts, which still leaves the question of how ChatGPT performs compared to state-of-the-art algorithms. In this paper, we use ChatGPT to create new data both with paraphrasing and with zero-shot generation, and compare it to seven other algorithms. We show that while ChatGPT performs exceptionally well on some simpler data, it overall does not perform better than the other algorithms, yet demands a much larger implication from the practitioner due to the ChatGPT often refusing to answer due to sensitive content in the datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="piedboeuf-langlais-2023-chatgpt">
<titleInfo>
<title>Is ChatGPT the ultimate Data Augmentation Algorithm?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Piedboeuf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Langlais</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the aftermath of GPT-3.5, commonly known as ChatGPT, research have attempted to assess its capacity for lowering annotation cost, either by doing zero-shot learning, generating new data, or replacing human annotators. Some studies have also investigated its use for data augmentation (DA), but only in limited contexts, which still leaves the question of how ChatGPT performs compared to state-of-the-art algorithms. In this paper, we use ChatGPT to create new data both with paraphrasing and with zero-shot generation, and compare it to seven other algorithms. We show that while ChatGPT performs exceptionally well on some simpler data, it overall does not perform better than the other algorithms, yet demands a much larger implication from the practitioner due to the ChatGPT often refusing to answer due to sensitive content in the datasets.</abstract>
<identifier type="citekey">piedboeuf-langlais-2023-chatgpt</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.1044</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.1044</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>15606</start>
<end>15615</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is ChatGPT the ultimate Data Augmentation Algorithm?
%A Piedboeuf, Frédéric
%A Langlais, Philippe
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F piedboeuf-langlais-2023-chatgpt
%X In the aftermath of GPT-3.5, commonly known as ChatGPT, research have attempted to assess its capacity for lowering annotation cost, either by doing zero-shot learning, generating new data, or replacing human annotators. Some studies have also investigated its use for data augmentation (DA), but only in limited contexts, which still leaves the question of how ChatGPT performs compared to state-of-the-art algorithms. In this paper, we use ChatGPT to create new data both with paraphrasing and with zero-shot generation, and compare it to seven other algorithms. We show that while ChatGPT performs exceptionally well on some simpler data, it overall does not perform better than the other algorithms, yet demands a much larger implication from the practitioner due to the ChatGPT often refusing to answer due to sensitive content in the datasets.
%R 10.18653/v1/2023.findings-emnlp.1044
%U https://aclanthology.org/2023.findings-emnlp.1044
%U https://doi.org/10.18653/v1/2023.findings-emnlp.1044
%P 15606-15615
Markdown (Informal)
[Is ChatGPT the ultimate Data Augmentation Algorithm?](https://aclanthology.org/2023.findings-emnlp.1044) (Piedboeuf & Langlais, Findings 2023)
ACL