@inproceedings{zhang-etal-2025-data,
title = "Data Augmentation for Cross-domain Parsing via Lightweight {LLM} Generation and Tree Hybridization",
author = "Zhang, Ziyan and
Hou, Yang and
Gong, Chen and
Li, Zhenghua",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.744/",
pages = "11235--11247",
abstract = "Cross-domain constituency parsing remains a challenging task due to the lack of high-quality out-of-domain data. In this paper, we propose a data augmentation method via lightweight large language model (LLM) generation and tree hybridization. We utilize LLM to generate phrase structures (subtrees) for the target domain by incorporating grammar rules and lexical head information into the prompt. To better leverage LLM-generated target-domain subtrees, we hybridize them with existing source-domain subtrees to efficiently produce a large number of structurally diverse instances. Experimental results demonstrate that our method achieves significant improvements on five target domains with a lightweight LLM generation cost."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-data">
<titleInfo>
<title>Data Augmentation for Cross-domain Parsing via Lightweight LLM Generation and Tree Hybridization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ziyan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenghua</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-domain constituency parsing remains a challenging task due to the lack of high-quality out-of-domain data. In this paper, we propose a data augmentation method via lightweight large language model (LLM) generation and tree hybridization. We utilize LLM to generate phrase structures (subtrees) for the target domain by incorporating grammar rules and lexical head information into the prompt. To better leverage LLM-generated target-domain subtrees, we hybridize them with existing source-domain subtrees to efficiently produce a large number of structurally diverse instances. Experimental results demonstrate that our method achieves significant improvements on five target domains with a lightweight LLM generation cost.</abstract>
<identifier type="citekey">zhang-etal-2025-data</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.744/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>11235</start>
<end>11247</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation for Cross-domain Parsing via Lightweight LLM Generation and Tree Hybridization
%A Zhang, Ziyan
%A Hou, Yang
%A Gong, Chen
%A Li, Zhenghua
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F zhang-etal-2025-data
%X Cross-domain constituency parsing remains a challenging task due to the lack of high-quality out-of-domain data. In this paper, we propose a data augmentation method via lightweight large language model (LLM) generation and tree hybridization. We utilize LLM to generate phrase structures (subtrees) for the target domain by incorporating grammar rules and lexical head information into the prompt. To better leverage LLM-generated target-domain subtrees, we hybridize them with existing source-domain subtrees to efficiently produce a large number of structurally diverse instances. Experimental results demonstrate that our method achieves significant improvements on five target domains with a lightweight LLM generation cost.
%U https://aclanthology.org/2025.coling-main.744/
%P 11235-11247
Markdown (Informal)
[Data Augmentation for Cross-domain Parsing via Lightweight LLM Generation and Tree Hybridization](https://aclanthology.org/2025.coling-main.744/) (Zhang et al., COLING 2025)
ACL