@inproceedings{lee-park-2025-dunamu,
title = "Dunamu {ML} at the Financial Misinformation Detection Challenge Task: Improving Supervised Fine-Tuning with {LLM}-based Data Augmentation",
author = "Lee, Dongjun and
Park, Heesoo",
editor = "Chen, Chung-Chi and
Moreno-Sandoval, Antonio and
Huang, Jimin and
Xie, Qianqian and
Ananiadou, Sophia and
Chen, Hsin-Hsi",
booktitle = "Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.finnlp-1.34/",
pages = "297--301",
abstract = "In this paper, we describe Dunamu ML`s submission to the Financial Misinformation Detection (FMD) 2025 shared task. To address the low-resource challenge in FMD, we augmented a general domain misinformation detection dataset for training. We first collected claims, contexts, and misinformation labels from a public dataset. Then, we generated evidence for each label based on a closed LLM with few-shot examples extracted from the FMD training dataset. Finally, we oversampled the training data specific to the financial domain and augmented it with the generated data to perform supervised fine-tuning (SFT) on the LLM. When evaluated on the blind test dataset, our model achieved an F1 score of 84.67 in misinformation classification and a ROUGE-1 score of 81.21 in evidence generation, ranking first on the leaderboard in both aspects."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-park-2025-dunamu">
<titleInfo>
<title>Dunamu ML at the Financial Misinformation Detection Challenge Task: Improving Supervised Fine-Tuning with LLM-based Data Augmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongjun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heesoo</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chung-Chi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Moreno-Sandoval</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimin</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianqian</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe Dunamu ML‘s submission to the Financial Misinformation Detection (FMD) 2025 shared task. To address the low-resource challenge in FMD, we augmented a general domain misinformation detection dataset for training. We first collected claims, contexts, and misinformation labels from a public dataset. Then, we generated evidence for each label based on a closed LLM with few-shot examples extracted from the FMD training dataset. Finally, we oversampled the training data specific to the financial domain and augmented it with the generated data to perform supervised fine-tuning (SFT) on the LLM. When evaluated on the blind test dataset, our model achieved an F1 score of 84.67 in misinformation classification and a ROUGE-1 score of 81.21 in evidence generation, ranking first on the leaderboard in both aspects.</abstract>
<identifier type="citekey">lee-park-2025-dunamu</identifier>
<location>
<url>https://aclanthology.org/2025.finnlp-1.34/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>297</start>
<end>301</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dunamu ML at the Financial Misinformation Detection Challenge Task: Improving Supervised Fine-Tuning with LLM-based Data Augmentation
%A Lee, Dongjun
%A Park, Heesoo
%Y Chen, Chung-Chi
%Y Moreno-Sandoval, Antonio
%Y Huang, Jimin
%Y Xie, Qianqian
%Y Ananiadou, Sophia
%Y Chen, Hsin-Hsi
%S Proceedings of the Joint Workshop of the 9th Financial Technology and Natural Language Processing (FinNLP), the 6th Financial Narrative Processing (FNP), and the 1st Workshop on Large Language Models for Finance and Legal (LLMFinLegal)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F lee-park-2025-dunamu
%X In this paper, we describe Dunamu ML‘s submission to the Financial Misinformation Detection (FMD) 2025 shared task. To address the low-resource challenge in FMD, we augmented a general domain misinformation detection dataset for training. We first collected claims, contexts, and misinformation labels from a public dataset. Then, we generated evidence for each label based on a closed LLM with few-shot examples extracted from the FMD training dataset. Finally, we oversampled the training data specific to the financial domain and augmented it with the generated data to perform supervised fine-tuning (SFT) on the LLM. When evaluated on the blind test dataset, our model achieved an F1 score of 84.67 in misinformation classification and a ROUGE-1 score of 81.21 in evidence generation, ranking first on the leaderboard in both aspects.
%U https://aclanthology.org/2025.finnlp-1.34/
%P 297-301
Markdown (Informal)
[Dunamu ML at the Financial Misinformation Detection Challenge Task: Improving Supervised Fine-Tuning with LLM-based Data Augmentation](https://aclanthology.org/2025.finnlp-1.34/) (Lee & Park, FinNLP 2025)
ACL