@inproceedings{buchner-etal-2024-prompt,
title = "Prompt Tuned Embedding Classification for Industry Sector Allocation",
author = "Buchner, Valentin and
Cao, Lele and
Kalo, Jan-Christoph and
Von Ehrenheim, Vilhelm",
editor = "Yang, Yi and
Davani, Aida and
Sil, Avi and
Kumar, Anoop",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-industry.10",
doi = "10.18653/v1/2024.naacl-industry.10",
pages = "108--118",
abstract = "We introduce Prompt Tuned Embedding Classification (PTEC) for classifying companies within an investment firm{'}s proprietary industry taxonomy, supporting their thematic investment strategy. PTEC assigns companies to the sectors they primarily operate in, conceptualizing this process as a multi-label text classification task. Prompt Tuning, usually deployed as a text-to-text (T2T) classification approach, ensures low computational cost while maintaining high task performance. However, T2T classification has limitations on multi-label tasks due to the generation of non-existing labels, permutation invariance of the label sequence, and a lack of confidence scores. PTEC addresses these limitations by utilizing a classification head in place of the Large Language Models (LLMs) language head. PTEC surpasses both baselines and human performance while lowering computational demands. This indicates the continuing need to adapt state-of-the-art methods to domain-specific tasks, even in the era of LLMs with strong generalization abilities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buchner-etal-2024-prompt">
<titleInfo>
<title>Prompt Tuned Embedding Classification for Industry Sector Allocation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Buchner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lele</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan-Christoph</namePart>
<namePart type="family">Kalo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vilhelm</namePart>
<namePart type="family">Von Ehrenheim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Davani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Avi</namePart>
<namePart type="family">Sil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce Prompt Tuned Embedding Classification (PTEC) for classifying companies within an investment firm’s proprietary industry taxonomy, supporting their thematic investment strategy. PTEC assigns companies to the sectors they primarily operate in, conceptualizing this process as a multi-label text classification task. Prompt Tuning, usually deployed as a text-to-text (T2T) classification approach, ensures low computational cost while maintaining high task performance. However, T2T classification has limitations on multi-label tasks due to the generation of non-existing labels, permutation invariance of the label sequence, and a lack of confidence scores. PTEC addresses these limitations by utilizing a classification head in place of the Large Language Models (LLMs) language head. PTEC surpasses both baselines and human performance while lowering computational demands. This indicates the continuing need to adapt state-of-the-art methods to domain-specific tasks, even in the era of LLMs with strong generalization abilities.</abstract>
<identifier type="citekey">buchner-etal-2024-prompt</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-industry.10</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-industry.10</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>108</start>
<end>118</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Prompt Tuned Embedding Classification for Industry Sector Allocation
%A Buchner, Valentin
%A Cao, Lele
%A Kalo, Jan-Christoph
%A Von Ehrenheim, Vilhelm
%Y Yang, Yi
%Y Davani, Aida
%Y Sil, Avi
%Y Kumar, Anoop
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F buchner-etal-2024-prompt
%X We introduce Prompt Tuned Embedding Classification (PTEC) for classifying companies within an investment firm’s proprietary industry taxonomy, supporting their thematic investment strategy. PTEC assigns companies to the sectors they primarily operate in, conceptualizing this process as a multi-label text classification task. Prompt Tuning, usually deployed as a text-to-text (T2T) classification approach, ensures low computational cost while maintaining high task performance. However, T2T classification has limitations on multi-label tasks due to the generation of non-existing labels, permutation invariance of the label sequence, and a lack of confidence scores. PTEC addresses these limitations by utilizing a classification head in place of the Large Language Models (LLMs) language head. PTEC surpasses both baselines and human performance while lowering computational demands. This indicates the continuing need to adapt state-of-the-art methods to domain-specific tasks, even in the era of LLMs with strong generalization abilities.
%R 10.18653/v1/2024.naacl-industry.10
%U https://aclanthology.org/2024.naacl-industry.10
%U https://doi.org/10.18653/v1/2024.naacl-industry.10
%P 108-118
Markdown (Informal)
[Prompt Tuned Embedding Classification for Industry Sector Allocation](https://aclanthology.org/2024.naacl-industry.10) (Buchner et al., NAACL 2024)
ACL
- Valentin Buchner, Lele Cao, Jan-Christoph Kalo, and Vilhelm Von Ehrenheim. 2024. Prompt Tuned Embedding Classification for Industry Sector Allocation. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 6: Industry Track), pages 108–118, Mexico City, Mexico. Association for Computational Linguistics.