@inproceedings{kim-etal-2024-stage,
title = "{STAGE}: Simple Text Data Augmentation by Graph Exploration",
author = "Kim, Ho-Seung and
Kang, YongHoon and
Lee, Jee-Hyong",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1325",
pages = "15238--15256",
abstract = "Pre-trained language models (PLMs) are widely used for various tasks, but fine-tuning them requires sufficient data. Data augmentation approaches have been proposed as alternatives, but they vary in complexity, cost, and performance. To address these challenges, we propose STAGE (Simple Text Data Augmentation by Graph Exploration), a highly effective method for data augmentation. STAGE utilizes simple modification operations such as insertion, deletion, replacement, and swap. However, what distinguishes STAGE lies in the selection of optimal words for each modification. This is achieved by leveraging a word-relation graph called the co-graph. The co-graph takes into account both word frequency and co-occurrence, providing valuable information for operand selection. To assess the performance of STAGE, we conduct evaluations using seven representative datasets and three different PLMs. Our results demonstrate the effectiveness of STAGE across diverse data domains, varying data sizes, and different PLMs. Also, STAGE demonstrates superior performance when compared to previous methods that use simple modification operations or large language models like GPT3.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2024-stage">
<titleInfo>
<title>STAGE: Simple Text Data Augmentation by Graph Exploration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ho-Seung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">YongHoon</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jee-Hyong</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-trained language models (PLMs) are widely used for various tasks, but fine-tuning them requires sufficient data. Data augmentation approaches have been proposed as alternatives, but they vary in complexity, cost, and performance. To address these challenges, we propose STAGE (Simple Text Data Augmentation by Graph Exploration), a highly effective method for data augmentation. STAGE utilizes simple modification operations such as insertion, deletion, replacement, and swap. However, what distinguishes STAGE lies in the selection of optimal words for each modification. This is achieved by leveraging a word-relation graph called the co-graph. The co-graph takes into account both word frequency and co-occurrence, providing valuable information for operand selection. To assess the performance of STAGE, we conduct evaluations using seven representative datasets and three different PLMs. Our results demonstrate the effectiveness of STAGE across diverse data domains, varying data sizes, and different PLMs. Also, STAGE demonstrates superior performance when compared to previous methods that use simple modification operations or large language models like GPT3.</abstract>
<identifier type="citekey">kim-etal-2024-stage</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1325</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>15238</start>
<end>15256</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T STAGE: Simple Text Data Augmentation by Graph Exploration
%A Kim, Ho-Seung
%A Kang, YongHoon
%A Lee, Jee-Hyong
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kim-etal-2024-stage
%X Pre-trained language models (PLMs) are widely used for various tasks, but fine-tuning them requires sufficient data. Data augmentation approaches have been proposed as alternatives, but they vary in complexity, cost, and performance. To address these challenges, we propose STAGE (Simple Text Data Augmentation by Graph Exploration), a highly effective method for data augmentation. STAGE utilizes simple modification operations such as insertion, deletion, replacement, and swap. However, what distinguishes STAGE lies in the selection of optimal words for each modification. This is achieved by leveraging a word-relation graph called the co-graph. The co-graph takes into account both word frequency and co-occurrence, providing valuable information for operand selection. To assess the performance of STAGE, we conduct evaluations using seven representative datasets and three different PLMs. Our results demonstrate the effectiveness of STAGE across diverse data domains, varying data sizes, and different PLMs. Also, STAGE demonstrates superior performance when compared to previous methods that use simple modification operations or large language models like GPT3.
%U https://aclanthology.org/2024.lrec-main.1325
%P 15238-15256
Markdown (Informal)
[STAGE: Simple Text Data Augmentation by Graph Exploration](https://aclanthology.org/2024.lrec-main.1325) (Kim et al., LREC-COLING 2024)
ACL
- Ho-Seung Kim, YongHoon Kang, and Jee-Hyong Lee. 2024. STAGE: Simple Text Data Augmentation by Graph Exploration. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 15238–15256, Torino, Italia. ELRA and ICCL.