@inproceedings{nishida-etal-2024-multi,
title = "Multi-label Learning with Random Circular Vectors",
author = "Nishida, Ken and
Machi, Kojiro and
Onishi, Kazuma and
Hayashi, Katsuhiko and
Kamigaito, Hidetaka",
editor = "Zhao, Chen and
Mosbach, Marius and
Atanasova, Pepa and
Goldfarb-Tarrent, Seraphina and
Hase, Peter and
Hosseini, Arian and
Elbayad, Maha and
Pezzelle, Sandro and
Mozes, Maximilian",
booktitle = "Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.repl4nlp-1.18",
pages = "245--255",
abstract = "The extreme multi-label classification (XMC) task involves learning a classifier that can predict from a large label set the most relevant subset of labels for a data instance. While deep neural networks (DNNs) have demonstrated remarkable success in XMC problems, the task is still challenging because it must deal with a large number of output labels, which make the DNN training computationally expensive. This paper addresses the issue by exploring the use of random circular vectors, where each vector component is represented as a complex amplitude. In our framework, we can develop an output layer and loss function of DNNs for XMC by representing the final output layer as a fully connected layer that directly predicts a low-dimensional circular vector encoding a set of labels for a data instance. We conducted experiments on synthetic datasets to verify that circular vectors have better label encoding capacity and retrieval ability than normal real-valued vectors. Then, we conducted experiments on actual XMC datasets and found that these appealing properties of circular vectors contribute to significant improvements in task performance compared with a previous model using random real-valued vectors, while reducing the size of the output layers by up to 99{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nishida-etal-2024-multi">
<titleInfo>
<title>Multi-label Learning with Random Circular Vectors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ken</namePart>
<namePart type="family">Nishida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kojiro</namePart>
<namePart type="family">Machi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kazuma</namePart>
<namePart type="family">Onishi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhiko</namePart>
<namePart type="family">Hayashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hidetaka</namePart>
<namePart type="family">Kamigaito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marius</namePart>
<namePart type="family">Mosbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pepa</namePart>
<namePart type="family">Atanasova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seraphina</namePart>
<namePart type="family">Goldfarb-Tarrent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Hase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arian</namePart>
<namePart type="family">Hosseini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maha</namePart>
<namePart type="family">Elbayad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Mozes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The extreme multi-label classification (XMC) task involves learning a classifier that can predict from a large label set the most relevant subset of labels for a data instance. While deep neural networks (DNNs) have demonstrated remarkable success in XMC problems, the task is still challenging because it must deal with a large number of output labels, which make the DNN training computationally expensive. This paper addresses the issue by exploring the use of random circular vectors, where each vector component is represented as a complex amplitude. In our framework, we can develop an output layer and loss function of DNNs for XMC by representing the final output layer as a fully connected layer that directly predicts a low-dimensional circular vector encoding a set of labels for a data instance. We conducted experiments on synthetic datasets to verify that circular vectors have better label encoding capacity and retrieval ability than normal real-valued vectors. Then, we conducted experiments on actual XMC datasets and found that these appealing properties of circular vectors contribute to significant improvements in task performance compared with a previous model using random real-valued vectors, while reducing the size of the output layers by up to 99%.</abstract>
<identifier type="citekey">nishida-etal-2024-multi</identifier>
<location>
<url>https://aclanthology.org/2024.repl4nlp-1.18</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>245</start>
<end>255</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-label Learning with Random Circular Vectors
%A Nishida, Ken
%A Machi, Kojiro
%A Onishi, Kazuma
%A Hayashi, Katsuhiko
%A Kamigaito, Hidetaka
%Y Zhao, Chen
%Y Mosbach, Marius
%Y Atanasova, Pepa
%Y Goldfarb-Tarrent, Seraphina
%Y Hase, Peter
%Y Hosseini, Arian
%Y Elbayad, Maha
%Y Pezzelle, Sandro
%Y Mozes, Maximilian
%S Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F nishida-etal-2024-multi
%X The extreme multi-label classification (XMC) task involves learning a classifier that can predict from a large label set the most relevant subset of labels for a data instance. While deep neural networks (DNNs) have demonstrated remarkable success in XMC problems, the task is still challenging because it must deal with a large number of output labels, which make the DNN training computationally expensive. This paper addresses the issue by exploring the use of random circular vectors, where each vector component is represented as a complex amplitude. In our framework, we can develop an output layer and loss function of DNNs for XMC by representing the final output layer as a fully connected layer that directly predicts a low-dimensional circular vector encoding a set of labels for a data instance. We conducted experiments on synthetic datasets to verify that circular vectors have better label encoding capacity and retrieval ability than normal real-valued vectors. Then, we conducted experiments on actual XMC datasets and found that these appealing properties of circular vectors contribute to significant improvements in task performance compared with a previous model using random real-valued vectors, while reducing the size of the output layers by up to 99%.
%U https://aclanthology.org/2024.repl4nlp-1.18
%P 245-255
Markdown (Informal)
[Multi-label Learning with Random Circular Vectors](https://aclanthology.org/2024.repl4nlp-1.18) (Nishida et al., RepL4NLP-WS 2024)
ACL
- Ken Nishida, Kojiro Machi, Kazuma Onishi, Katsuhiko Hayashi, and Hidetaka Kamigaito. 2024. Multi-label Learning with Random Circular Vectors. In Proceedings of the 9th Workshop on Representation Learning for NLP (RepL4NLP-2024), pages 245–255, Bangkok, Thailand. Association for Computational Linguistics.