@inproceedings{gallipoli-etal-2024-keyword,
title = "Keyword-based Annotation of Visually-Rich Document Content for Trend and Risk Analysis Using Large Language Models",
author = "Gallipoli, Giuseppe and
Papicchio, Simone and
Vaiani, Lorenzo and
Cagliero, Luca and
Miola, Arianna and
Borghi, Daniele",
editor = "Chen, Chung-Chi and
Liu, Xiaomo and
Hahn, Udo and
Nourbakhsh, Armineh and
Ma, Zhiqiang and
Smiley, Charese and
Hoste, Veronique and
Das, Sanjiv Ranjan and
Li, Manling and
Ghassemi, Mohammad and
Huang, Hen-Hsen and
Takamura, Hiroya and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.finnlp-1.13",
pages = "130--136",
abstract = "In the banking and finance sectors, members of the business units focused on Trend and Risk Analysis daily process internal and external visually-rich documents including text, images, and tables. Given a facet (i.e., topic) of interest, they are particularly interested in retrieving the top trending keywords related to it and then use them to annotate the most relevant document elements (e.g., text paragraphs, images or tables). In this paper, we explore the use of both open-source and proprietary Large Language Models to automatically generate lists of facet-relevant keywords, automatically produce free-text descriptions of both keywords and multimedia document content, and then annotate documents by leveraging textual similarity approaches. The preliminary results, achieved on English and Italian documents, show that OpenAI GPT-4 achieves superior performance in keyword description generation and multimedia content annotation, while the open-source Meta AI Llama2 model turns out to be highly competitive in generating additional keywords.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gallipoli-etal-2024-keyword">
<titleInfo>
<title>Keyword-based Annotation of Visually-Rich Document Content for Trend and Risk Analysis Using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Gallipoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Papicchio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lorenzo</namePart>
<namePart type="family">Vaiani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Cagliero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Miola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniele</namePart>
<namePart type="family">Borghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaomo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Armineh</namePart>
<namePart type="family">Nourbakhsh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiqiang</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charese</namePart>
<namePart type="family">Smiley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjiv</namePart>
<namePart type="given">Ranjan</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Ghassemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hen-Hsen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the banking and finance sectors, members of the business units focused on Trend and Risk Analysis daily process internal and external visually-rich documents including text, images, and tables. Given a facet (i.e., topic) of interest, they are particularly interested in retrieving the top trending keywords related to it and then use them to annotate the most relevant document elements (e.g., text paragraphs, images or tables). In this paper, we explore the use of both open-source and proprietary Large Language Models to automatically generate lists of facet-relevant keywords, automatically produce free-text descriptions of both keywords and multimedia document content, and then annotate documents by leveraging textual similarity approaches. The preliminary results, achieved on English and Italian documents, show that OpenAI GPT-4 achieves superior performance in keyword description generation and multimedia content annotation, while the open-source Meta AI Llama2 model turns out to be highly competitive in generating additional keywords.</abstract>
<identifier type="citekey">gallipoli-etal-2024-keyword</identifier>
<location>
<url>https://aclanthology.org/2024.finnlp-1.13</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>130</start>
<end>136</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Keyword-based Annotation of Visually-Rich Document Content for Trend and Risk Analysis Using Large Language Models
%A Gallipoli, Giuseppe
%A Papicchio, Simone
%A Vaiani, Lorenzo
%A Cagliero, Luca
%A Miola, Arianna
%A Borghi, Daniele
%Y Chen, Chung-Chi
%Y Liu, Xiaomo
%Y Hahn, Udo
%Y Nourbakhsh, Armineh
%Y Ma, Zhiqiang
%Y Smiley, Charese
%Y Hoste, Veronique
%Y Das, Sanjiv Ranjan
%Y Li, Manling
%Y Ghassemi, Mohammad
%Y Huang, Hen-Hsen
%Y Takamura, Hiroya
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing
%D 2024
%8 May
%I Association for Computational Linguistics
%C Torino, Italia
%F gallipoli-etal-2024-keyword
%X In the banking and finance sectors, members of the business units focused on Trend and Risk Analysis daily process internal and external visually-rich documents including text, images, and tables. Given a facet (i.e., topic) of interest, they are particularly interested in retrieving the top trending keywords related to it and then use them to annotate the most relevant document elements (e.g., text paragraphs, images or tables). In this paper, we explore the use of both open-source and proprietary Large Language Models to automatically generate lists of facet-relevant keywords, automatically produce free-text descriptions of both keywords and multimedia document content, and then annotate documents by leveraging textual similarity approaches. The preliminary results, achieved on English and Italian documents, show that OpenAI GPT-4 achieves superior performance in keyword description generation and multimedia content annotation, while the open-source Meta AI Llama2 model turns out to be highly competitive in generating additional keywords.
%U https://aclanthology.org/2024.finnlp-1.13
%P 130-136
Markdown (Informal)
[Keyword-based Annotation of Visually-Rich Document Content for Trend and Risk Analysis Using Large Language Models](https://aclanthology.org/2024.finnlp-1.13) (Gallipoli et al., FinNLP 2024)
ACL
- Giuseppe Gallipoli, Simone Papicchio, Lorenzo Vaiani, Luca Cagliero, Arianna Miola, and Daniele Borghi. 2024. Keyword-based Annotation of Visually-Rich Document Content for Trend and Risk Analysis Using Large Language Models. In Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing, pages 130–136, Torino, Italia. Association for Computational Linguistics.