@inproceedings{tortoreto-mousavi-2024-dolomites,
title = "Dolomites@{\#}{SMM}4{H} 2024: Helping {LLM}s {``}Know The Drill{''} in Low-Resource Settings - A Study on Social Media Posts",
author = "Tortoreto, Giuliano and
Mousavi, Seyed Mahed",
editor = "Xu, Dongfang and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.smm4h-1.5",
pages = "17--22",
abstract = "The amount of data to fine-tune LLMs plays a crucial role in the performance of these models in downstream tasks. Consequently, it is not straightforward to deploy these models in low-resource settings. In this work, we investigate two new multi-task learning data augmentation approaches for fine-tuning LLMs when little data is available: {``}In-domain Augmentation{''} of the training data and extracting {``}Drills{''} as smaller tasks from the target dataset. We evaluate the proposed approaches in three natural language processing settings in the context of SMM4H 2024 competition tasks: multi-class classification, entity recognition, and information extraction. The results show that both techniques improve the performance of the models in all three settings, suggesting a positive impact from the knowledge learned in multi-task training to perform the target task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tortoreto-mousavi-2024-dolomites">
<titleInfo>
<title>Dolomites@#SMM4H 2024: Helping LLMs “Know The Drill” in Low-Resource Settings - A Study on Social Media Posts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuliano</namePart>
<namePart type="family">Tortoreto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyed</namePart>
<namePart type="given">Mahed</namePart>
<namePart type="family">Mousavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dongfang</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The amount of data to fine-tune LLMs plays a crucial role in the performance of these models in downstream tasks. Consequently, it is not straightforward to deploy these models in low-resource settings. In this work, we investigate two new multi-task learning data augmentation approaches for fine-tuning LLMs when little data is available: “In-domain Augmentation” of the training data and extracting “Drills” as smaller tasks from the target dataset. We evaluate the proposed approaches in three natural language processing settings in the context of SMM4H 2024 competition tasks: multi-class classification, entity recognition, and information extraction. The results show that both techniques improve the performance of the models in all three settings, suggesting a positive impact from the knowledge learned in multi-task training to perform the target task.</abstract>
<identifier type="citekey">tortoreto-mousavi-2024-dolomites</identifier>
<location>
<url>https://aclanthology.org/2024.smm4h-1.5</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>17</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dolomites@#SMM4H 2024: Helping LLMs “Know The Drill” in Low-Resource Settings - A Study on Social Media Posts
%A Tortoreto, Giuliano
%A Mousavi, Seyed Mahed
%Y Xu, Dongfang
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of The 9th Social Media Mining for Health Research and Applications (SMM4H 2024) Workshop and Shared Tasks
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F tortoreto-mousavi-2024-dolomites
%X The amount of data to fine-tune LLMs plays a crucial role in the performance of these models in downstream tasks. Consequently, it is not straightforward to deploy these models in low-resource settings. In this work, we investigate two new multi-task learning data augmentation approaches for fine-tuning LLMs when little data is available: “In-domain Augmentation” of the training data and extracting “Drills” as smaller tasks from the target dataset. We evaluate the proposed approaches in three natural language processing settings in the context of SMM4H 2024 competition tasks: multi-class classification, entity recognition, and information extraction. The results show that both techniques improve the performance of the models in all three settings, suggesting a positive impact from the knowledge learned in multi-task training to perform the target task.
%U https://aclanthology.org/2024.smm4h-1.5
%P 17-22
Markdown (Informal)
[Dolomites@#SMM4H 2024: Helping LLMs “Know The Drill” in Low-Resource Settings - A Study on Social Media Posts](https://aclanthology.org/2024.smm4h-1.5) (Tortoreto & Mousavi, SMM4H-WS 2024)
ACL