@inproceedings{van-nooten-kosar-2024-advancing,
title = "Advancing {CSR} Theme and Topic Classification: {LLM}s and Training Enhancement Insights",
author = "Van Nooten, Jens and
Kosar, Andriy",
editor = "Chen, Chung-Chi and
Liu, Xiaomo and
Hahn, Udo and
Nourbakhsh, Armineh and
Ma, Zhiqiang and
Smiley, Charese and
Hoste, Veronique and
Das, Sanjiv Ranjan and
Li, Manling and
Ghassemi, Mohammad and
Huang, Hen-Hsen and
Takamura, Hiroya and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.finnlp-1.33",
pages = "292--305",
abstract = "In this paper, we present our results of the classification of Corporate Social Responsibility (CSR) Themes and Topics shared task, which encompasses cross-lingual multi-class classification and monolingual multi-label classification. We examine the performance of multiple machine learning (ML) models, ranging from classical models to pre-trained large language models (LLMs), and assess the effectiveness of Data Augmentation (DA), Data Translation (DT), and Contrastive Learning (CL). We find that state-of-the-art generative LLMs in a zero-shot setup still fall behind on more complex classification tasks compared to fine-tuning local models with enhanced datasets and additional training objectives. Our work provides a wide array of comparisons and highlights the relevance of utilizing smaller language models for more complex classification tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-nooten-kosar-2024-advancing">
<titleInfo>
<title>Advancing CSR Theme and Topic Classification: LLMs and Training Enhancement Insights</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jens</namePart>
<namePart type="family">Van Nooten</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andriy</namePart>
<namePart type="family">Kosar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaomo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Armineh</namePart>
<namePart type="family">Nourbakhsh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charese</namePart>
<namePart type="family">Smiley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjiv</namePart>
<namePart type="given">Ranjan</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Ghassemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hen-Hsen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present our results of the classification of Corporate Social Responsibility (CSR) Themes and Topics shared task, which encompasses cross-lingual multi-class classification and monolingual multi-label classification. We examine the performance of multiple machine learning (ML) models, ranging from classical models to pre-trained large language models (LLMs), and assess the effectiveness of Data Augmentation (DA), Data Translation (DT), and Contrastive Learning (CL). We find that state-of-the-art generative LLMs in a zero-shot setup still fall behind on more complex classification tasks compared to fine-tuning local models with enhanced datasets and additional training objectives. Our work provides a wide array of comparisons and highlights the relevance of utilizing smaller language models for more complex classification tasks.</abstract>
<identifier type="citekey">van-nooten-kosar-2024-advancing</identifier>
<location>
<url>https://aclanthology.org/2024.finnlp-1.33</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>292</start>
<end>305</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advancing CSR Theme and Topic Classification: LLMs and Training Enhancement Insights
%A Van Nooten, Jens
%A Kosar, Andriy
%Y Chen, Chung-Chi
%Y Liu, Xiaomo
%Y Hahn, Udo
%Y Nourbakhsh, Armineh
%Y Ma, Zhiqiang
%Y Smiley, Charese
%Y Hoste, Veronique
%Y Das, Sanjiv Ranjan
%Y Li, Manling
%Y Ghassemi, Mohammad
%Y Huang, Hen-Hsen
%Y Takamura, Hiroya
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing
%D 2024
%8 May
%I Association for Computational Linguistics
%C Torino, Italia
%F van-nooten-kosar-2024-advancing
%X In this paper, we present our results of the classification of Corporate Social Responsibility (CSR) Themes and Topics shared task, which encompasses cross-lingual multi-class classification and monolingual multi-label classification. We examine the performance of multiple machine learning (ML) models, ranging from classical models to pre-trained large language models (LLMs), and assess the effectiveness of Data Augmentation (DA), Data Translation (DT), and Contrastive Learning (CL). We find that state-of-the-art generative LLMs in a zero-shot setup still fall behind on more complex classification tasks compared to fine-tuning local models with enhanced datasets and additional training objectives. Our work provides a wide array of comparisons and highlights the relevance of utilizing smaller language models for more complex classification tasks.
%U https://aclanthology.org/2024.finnlp-1.33
%P 292-305
Markdown (Informal)
[Advancing CSR Theme and Topic Classification: LLMs and Training Enhancement Insights](https://aclanthology.org/2024.finnlp-1.33) (Van Nooten & Kosar, FinNLP 2024)
ACL
- Jens Van Nooten and Andriy Kosar. 2024. Advancing CSR Theme and Topic Classification: LLMs and Training Enhancement Insights. In Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing, pages 292–305, Torino, Italia. Association for Computational Linguistics.