@inproceedings{zhang-etal-2024-text-segmentation,
title = "From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts",
author = "Zhang, Wang and
Wang, Xin and
Wang, Qian and
Deng, Tao and
Wu, Xiaoru",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.402",
pages = "6864--6873",
abstract = "Multi-label text classification (MLTC) is an important task in the field of natural language processing. Most existing models rely on high-quality text representations provided by pre-trained language models (PLMs). They hence face the challenge of input length limitation caused by PLMs, when dealing with long texts. In light of this, we introduce a comprehensive approach to multi-label long text classification. We propose a text segmentation algorithm, which guarantees to produce the optimal segmentation, to address the issue of input length limitation caused by PLMs. We incorporate external knowledge, labels{'} co-occurrence relations, and attention mechanisms in representation learning to enhance both text and label representations. Our method{'}s effectiveness is validated through extensive experiments on various MLTC datasets, unraveling the intricate correlations between texts and labels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-text-segmentation">
<titleInfo>
<title>From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qian</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Deng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoru</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multi-label text classification (MLTC) is an important task in the field of natural language processing. Most existing models rely on high-quality text representations provided by pre-trained language models (PLMs). They hence face the challenge of input length limitation caused by PLMs, when dealing with long texts. In light of this, we introduce a comprehensive approach to multi-label long text classification. We propose a text segmentation algorithm, which guarantees to produce the optimal segmentation, to address the issue of input length limitation caused by PLMs. We incorporate external knowledge, labels’ co-occurrence relations, and attention mechanisms in representation learning to enhance both text and label representations. Our method’s effectiveness is validated through extensive experiments on various MLTC datasets, unraveling the intricate correlations between texts and labels.</abstract>
<identifier type="citekey">zhang-etal-2024-text-segmentation</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.402</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>6864</start>
<end>6873</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts
%A Zhang, Wang
%A Wang, Xin
%A Wang, Qian
%A Deng, Tao
%A Wu, Xiaoru
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhang-etal-2024-text-segmentation
%X Multi-label text classification (MLTC) is an important task in the field of natural language processing. Most existing models rely on high-quality text representations provided by pre-trained language models (PLMs). They hence face the challenge of input length limitation caused by PLMs, when dealing with long texts. In light of this, we introduce a comprehensive approach to multi-label long text classification. We propose a text segmentation algorithm, which guarantees to produce the optimal segmentation, to address the issue of input length limitation caused by PLMs. We incorporate external knowledge, labels’ co-occurrence relations, and attention mechanisms in representation learning to enhance both text and label representations. Our method’s effectiveness is validated through extensive experiments on various MLTC datasets, unraveling the intricate correlations between texts and labels.
%U https://aclanthology.org/2024.findings-emnlp.402
%P 6864-6873
Markdown (Informal)
[From Text Segmentation to Enhanced Representation Learning: A Novel Approach to Multi-Label Classification for Long Texts](https://aclanthology.org/2024.findings-emnlp.402) (Zhang et al., Findings 2024)
ACL