@article{zhang-etal-2024-llm-assisted,
title = "{LLM}-Assisted Data Augmentation for {C}hinese Dialogue-Level Dependency Parsing",
author = "Zhang, Meishan and
Jiang, Gongyao and
Liu, Shuang and
Chen, Jing and
Zhang, Min",
journal = "Computational Linguistics",
volume = "50",
number = "3",
month = sep,
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2024.cl-3.2",
doi = "10.1162/coli_a_00515",
pages = "867--891",
abstract = "Dialogue-level dependency parsing, despite its growing academic interest, often encounters underperformance issues due to resource shortages. A potential solution to this challenge is data augmentation. In recent years, large language models (LLMs) have demonstrated strong capabilities in generation, which can facilitate data augmentation greatly. In this study, we focus on Chinese dialogue-level dependency parsing, presenting three simple and effective strategies with LLM to augment the original training instances, namely word-level, syntax-level, and discourse-level augmentations, respectively. These strategies enable LLMs to either preserve or modify dependency structures, thereby assuring accuracy while increasing the diversity of instances at different levels. We conduct experiments on the benchmark dataset released by Jiang et al. (2023) to validate our approach. Results show that our method can greatly boost the parsing performance in various settings, particularly in dependencies among elementary discourse units. Lastly, we provide in-depth analysis to show the key points of our data augmentation strategies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-llm-assisted">
<titleInfo>
<title>LLM-Assisted Data Augmentation for Chinese Dialogue-Level Dependency Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meishan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gongyao</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Dialogue-level dependency parsing, despite its growing academic interest, often encounters underperformance issues due to resource shortages. A potential solution to this challenge is data augmentation. In recent years, large language models (LLMs) have demonstrated strong capabilities in generation, which can facilitate data augmentation greatly. In this study, we focus on Chinese dialogue-level dependency parsing, presenting three simple and effective strategies with LLM to augment the original training instances, namely word-level, syntax-level, and discourse-level augmentations, respectively. These strategies enable LLMs to either preserve or modify dependency structures, thereby assuring accuracy while increasing the diversity of instances at different levels. We conduct experiments on the benchmark dataset released by Jiang et al. (2023) to validate our approach. Results show that our method can greatly boost the parsing performance in various settings, particularly in dependencies among elementary discourse units. Lastly, we provide in-depth analysis to show the key points of our data augmentation strategies.</abstract>
<identifier type="citekey">zhang-etal-2024-llm-assisted</identifier>
<identifier type="doi">10.1162/coli_a_00515</identifier>
<location>
<url>https://aclanthology.org/2024.cl-3.2</url>
</location>
<part>
<date>2024-09</date>
<detail type="volume"><number>50</number></detail>
<detail type="issue"><number>3</number></detail>
<extent unit="page">
<start>867</start>
<end>891</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T LLM-Assisted Data Augmentation for Chinese Dialogue-Level Dependency Parsing
%A Zhang, Meishan
%A Jiang, Gongyao
%A Liu, Shuang
%A Chen, Jing
%A Zhang, Min
%J Computational Linguistics
%D 2024
%8 September
%V 50
%N 3
%I MIT Press
%C Cambridge, MA
%F zhang-etal-2024-llm-assisted
%X Dialogue-level dependency parsing, despite its growing academic interest, often encounters underperformance issues due to resource shortages. A potential solution to this challenge is data augmentation. In recent years, large language models (LLMs) have demonstrated strong capabilities in generation, which can facilitate data augmentation greatly. In this study, we focus on Chinese dialogue-level dependency parsing, presenting three simple and effective strategies with LLM to augment the original training instances, namely word-level, syntax-level, and discourse-level augmentations, respectively. These strategies enable LLMs to either preserve or modify dependency structures, thereby assuring accuracy while increasing the diversity of instances at different levels. We conduct experiments on the benchmark dataset released by Jiang et al. (2023) to validate our approach. Results show that our method can greatly boost the parsing performance in various settings, particularly in dependencies among elementary discourse units. Lastly, we provide in-depth analysis to show the key points of our data augmentation strategies.
%R 10.1162/coli_a_00515
%U https://aclanthology.org/2024.cl-3.2
%U https://doi.org/10.1162/coli_a_00515
%P 867-891
Markdown (Informal)
[LLM-Assisted Data Augmentation for Chinese Dialogue-Level Dependency Parsing](https://aclanthology.org/2024.cl-3.2) (Zhang et al., CL 2024)
ACL