@inproceedings{wang-etal-2023-towards-better,
title = "Towards Better Hierarchical Text Classification with Data Generation",
author = "Wang, Yue and
Qiao, Dan and
Li, Juntao and
Chang, Jinxiong and
Zhang, Qishen and
Liu, Zhongyi and
Zhang, Guannan and
Zhang, Min",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.489",
doi = "10.18653/v1/2023.findings-acl.489",
pages = "7722--7739",
abstract = "Hierarchical text classification (HTC) focuses on classifying one text into multiple labels, which are organized as a hierarchical taxonomy. Due to its wide involution in realistic scenarios, HTC attracts long-term attention from both industry and academia. However, the high cost of hierarchical multi-label annotation makes HTC suffer from the data scarcity problem. In view of the difficulty in balancing the controllability of multiple structural labels and text diversity, automatically generating high-quality data for HTC is challenging and under-explored. To fill this blank, we propose a novel data generation framework tailored for HTC, which can achieve both label controllability and text diversity by extracting high-quality semantic-level and phrase-level hierarchical label information. Experimental results on three benchmarks demonstrate that, compared with existing data augmentation methods, the data generated from our method can bring the most significant performance improvements of several strong HTC models. Extensive analysis confirms that the improvements yielded by our proposed method do correlate to the enhancement of label controllability and text diversity.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2023-towards-better">
<titleInfo>
<title>Towards Better Hierarchical Text Classification with Data Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Qiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinxiong</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qishen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhongyi</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guannan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hierarchical text classification (HTC) focuses on classifying one text into multiple labels, which are organized as a hierarchical taxonomy. Due to its wide involution in realistic scenarios, HTC attracts long-term attention from both industry and academia. However, the high cost of hierarchical multi-label annotation makes HTC suffer from the data scarcity problem. In view of the difficulty in balancing the controllability of multiple structural labels and text diversity, automatically generating high-quality data for HTC is challenging and under-explored. To fill this blank, we propose a novel data generation framework tailored for HTC, which can achieve both label controllability and text diversity by extracting high-quality semantic-level and phrase-level hierarchical label information. Experimental results on three benchmarks demonstrate that, compared with existing data augmentation methods, the data generated from our method can bring the most significant performance improvements of several strong HTC models. Extensive analysis confirms that the improvements yielded by our proposed method do correlate to the enhancement of label controllability and text diversity.</abstract>
<identifier type="citekey">wang-etal-2023-towards-better</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.489</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.489</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>7722</start>
<end>7739</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Better Hierarchical Text Classification with Data Generation
%A Wang, Yue
%A Qiao, Dan
%A Li, Juntao
%A Chang, Jinxiong
%A Zhang, Qishen
%A Liu, Zhongyi
%A Zhang, Guannan
%A Zhang, Min
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F wang-etal-2023-towards-better
%X Hierarchical text classification (HTC) focuses on classifying one text into multiple labels, which are organized as a hierarchical taxonomy. Due to its wide involution in realistic scenarios, HTC attracts long-term attention from both industry and academia. However, the high cost of hierarchical multi-label annotation makes HTC suffer from the data scarcity problem. In view of the difficulty in balancing the controllability of multiple structural labels and text diversity, automatically generating high-quality data for HTC is challenging and under-explored. To fill this blank, we propose a novel data generation framework tailored for HTC, which can achieve both label controllability and text diversity by extracting high-quality semantic-level and phrase-level hierarchical label information. Experimental results on three benchmarks demonstrate that, compared with existing data augmentation methods, the data generated from our method can bring the most significant performance improvements of several strong HTC models. Extensive analysis confirms that the improvements yielded by our proposed method do correlate to the enhancement of label controllability and text diversity.
%R 10.18653/v1/2023.findings-acl.489
%U https://aclanthology.org/2023.findings-acl.489
%U https://doi.org/10.18653/v1/2023.findings-acl.489
%P 7722-7739
Markdown (Informal)
[Towards Better Hierarchical Text Classification with Data Generation](https://aclanthology.org/2023.findings-acl.489) (Wang et al., Findings 2023)
ACL
- Yue Wang, Dan Qiao, Juntao Li, Jinxiong Chang, Qishen Zhang, Zhongyi Liu, Guannan Zhang, and Min Zhang. 2023. Towards Better Hierarchical Text Classification with Data Generation. In Findings of the Association for Computational Linguistics: ACL 2023, pages 7722–7739, Toronto, Canada. Association for Computational Linguistics.