@inproceedings{kuo-2024-hands,
title = "Hands-On {NLP} with Hugging Face: {ALTA} 2024 Tutorial on Efficient Fine-Tuning and Quantisation",
author = "Kuo, Nicholas I-Hsien",
editor = "Baldwin, Tim and
Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e} and
Kuo, Nicholas",
booktitle = "Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2024",
address = "Canberra, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.alta-1.20/",
pages = "213--213",
abstract = "This tutorial, presented at ALTA 2024, focuses on efficient fine-tuning and quantisation techniques for large language models (LLMs), addressing challenges in deploying state-of-the-art models on resource-constrained hardware. It introduces parameter-efficient fine-tuning (PEFT) methods, such as Low-Rank Adaptation (LoRA), and model quantisation strategies, which enable training and inference of LLMs on GPUs with limited memory (e.g., 16 GB VRAM). Participants will work with TinyLlama (1.1B) and the public domain text War and Peace as an accessible dataset, ensuring there are no barriers like credentialled access to Hugging Face or PhysioNet datasets. The tutorial also demonstrates common training challenges, such as OutOfMemoryError, and shows how PEFT can mitigate these issues, enabling large-scale fine-tuning even in resource-limited environments."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuo-2024-hands">
<titleInfo>
<title>Hands-On NLP with Hugging Face: ALTA 2024 Tutorial on Efficient Fine-Tuning and Quantisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="given">I-Hsien</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">José</namePart>
<namePart type="family">Rodríguez Méndez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Kuo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Canberra, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This tutorial, presented at ALTA 2024, focuses on efficient fine-tuning and quantisation techniques for large language models (LLMs), addressing challenges in deploying state-of-the-art models on resource-constrained hardware. It introduces parameter-efficient fine-tuning (PEFT) methods, such as Low-Rank Adaptation (LoRA), and model quantisation strategies, which enable training and inference of LLMs on GPUs with limited memory (e.g., 16 GB VRAM). Participants will work with TinyLlama (1.1B) and the public domain text War and Peace as an accessible dataset, ensuring there are no barriers like credentialled access to Hugging Face or PhysioNet datasets. The tutorial also demonstrates common training challenges, such as OutOfMemoryError, and shows how PEFT can mitigate these issues, enabling large-scale fine-tuning even in resource-limited environments.</abstract>
<identifier type="citekey">kuo-2024-hands</identifier>
<location>
<url>https://aclanthology.org/2024.alta-1.20/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>213</start>
<end>213</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hands-On NLP with Hugging Face: ALTA 2024 Tutorial on Efficient Fine-Tuning and Quantisation
%A Kuo, Nicholas I-Hsien
%Y Baldwin, Tim
%Y Rodríguez Méndez, Sergio José
%Y Kuo, Nicholas
%S Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association
%D 2024
%8 December
%I Association for Computational Linguistics
%C Canberra, Australia
%F kuo-2024-hands
%X This tutorial, presented at ALTA 2024, focuses on efficient fine-tuning and quantisation techniques for large language models (LLMs), addressing challenges in deploying state-of-the-art models on resource-constrained hardware. It introduces parameter-efficient fine-tuning (PEFT) methods, such as Low-Rank Adaptation (LoRA), and model quantisation strategies, which enable training and inference of LLMs on GPUs with limited memory (e.g., 16 GB VRAM). Participants will work with TinyLlama (1.1B) and the public domain text War and Peace as an accessible dataset, ensuring there are no barriers like credentialled access to Hugging Face or PhysioNet datasets. The tutorial also demonstrates common training challenges, such as OutOfMemoryError, and shows how PEFT can mitigate these issues, enabling large-scale fine-tuning even in resource-limited environments.
%U https://aclanthology.org/2024.alta-1.20/
%P 213-213
Markdown (Informal)
[Hands-On NLP with Hugging Face: ALTA 2024 Tutorial on Efficient Fine-Tuning and Quantisation](https://aclanthology.org/2024.alta-1.20/) (Kuo, ALTA 2024)
ACL