@inproceedings{chen-lin-2024-random,
title = "Random Label Forests: An Ensemble Method with Label Subsampling For Extreme Multi-Label Problems",
author = "Chen, Sheng-Wei and
Lin, Chih-Jen",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.825",
pages = "14107--14119",
abstract = "Text classification is one of the essential topics in natural language processing, and each text is often associated with multiple labels. Recently, the number of labels has become larger and larger, especially in the applications of e-commerce, so handling text-related e-commerce problems further requires a large memory space in many existing multi-label learning methods. To address the space concern, utilizing a distributed system to share that large memory requirement is a possible solution. We propose {``}random label forests,{''} a distributed ensemble method with label subsampling, for handling extremely large-scale labels. Random label forests can reduce memory usage per computer while keeping competitive performances over real-world data sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-lin-2024-random">
<titleInfo>
<title>Random Label Forests: An Ensemble Method with Label Subsampling For Extreme Multi-Label Problems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng-Wei</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chih-Jen</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text classification is one of the essential topics in natural language processing, and each text is often associated with multiple labels. Recently, the number of labels has become larger and larger, especially in the applications of e-commerce, so handling text-related e-commerce problems further requires a large memory space in many existing multi-label learning methods. To address the space concern, utilizing a distributed system to share that large memory requirement is a possible solution. We propose “random label forests,” a distributed ensemble method with label subsampling, for handling extremely large-scale labels. Random label forests can reduce memory usage per computer while keeping competitive performances over real-world data sets.</abstract>
<identifier type="citekey">chen-lin-2024-random</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.825</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14107</start>
<end>14119</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Random Label Forests: An Ensemble Method with Label Subsampling For Extreme Multi-Label Problems
%A Chen, Sheng-Wei
%A Lin, Chih-Jen
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F chen-lin-2024-random
%X Text classification is one of the essential topics in natural language processing, and each text is often associated with multiple labels. Recently, the number of labels has become larger and larger, especially in the applications of e-commerce, so handling text-related e-commerce problems further requires a large memory space in many existing multi-label learning methods. To address the space concern, utilizing a distributed system to share that large memory requirement is a possible solution. We propose “random label forests,” a distributed ensemble method with label subsampling, for handling extremely large-scale labels. Random label forests can reduce memory usage per computer while keeping competitive performances over real-world data sets.
%U https://aclanthology.org/2024.findings-emnlp.825
%P 14107-14119
Markdown (Informal)
[Random Label Forests: An Ensemble Method with Label Subsampling For Extreme Multi-Label Problems](https://aclanthology.org/2024.findings-emnlp.825) (Chen & Lin, Findings 2024)
ACL