@inproceedings{graziuso-etal-2024-task,
title = "Task-Incremental Learning on Long Text Sequences",
author = "Graziuso, Natalia and
Zugarini, Andrea and
Melacci, Stefano",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.49/",
pages = "410--416",
ISBN = "979-12-210-7060-6",
abstract = "The extraordinary results achieved by Large Language Models are paired with issues that are critical in real-world applications. The costs of inference and, in particular, training are extremely large, both in terms of time and computational resources, and they become prohibitive when working in dynamic environments, where data and tasks are progressively provided over time. The model must be able to adapt to new knowledge, new domains, new settings, without forgetting the previously learned skills. Retraining from scratch easily becomes too costly, thus Continual Learning strategies are of crucial importance. This is even more evident when data consist of {\textquotedblleft}long{\textquotedblright} documents, that require several resources to be processed by modern neural models, leading to very long prompts. This paper investigates LLM-based Task-Incremental Learning in the case of tasks exploiting long sequences of text, as it is typical in summarization, question-answering on long documents, reviewing long contracts, and several others. We show how adapting the model by Task Arithmetic with LoRA, which was proposed for visual data, yields promising results also in the case of such {\textquotedblleft}long{\textquotedblright} text data. To our best knowledge, this is the first work along this challenging direction. The outcome of the investigation of this paper is generic enough to represent an important starting point for further research in processing linguistic data in every language."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="graziuso-etal-2024-task">
<titleInfo>
<title>Task-Incremental Learning on Long Text Sequences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Natalia</namePart>
<namePart type="family">Graziuso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Zugarini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Melacci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>The extraordinary results achieved by Large Language Models are paired with issues that are critical in real-world applications. The costs of inference and, in particular, training are extremely large, both in terms of time and computational resources, and they become prohibitive when working in dynamic environments, where data and tasks are progressively provided over time. The model must be able to adapt to new knowledge, new domains, new settings, without forgetting the previously learned skills. Retraining from scratch easily becomes too costly, thus Continual Learning strategies are of crucial importance. This is even more evident when data consist of “long” documents, that require several resources to be processed by modern neural models, leading to very long prompts. This paper investigates LLM-based Task-Incremental Learning in the case of tasks exploiting long sequences of text, as it is typical in summarization, question-answering on long documents, reviewing long contracts, and several others. We show how adapting the model by Task Arithmetic with LoRA, which was proposed for visual data, yields promising results also in the case of such “long” text data. To our best knowledge, this is the first work along this challenging direction. The outcome of the investigation of this paper is generic enough to represent an important starting point for further research in processing linguistic data in every language.</abstract>
<identifier type="citekey">graziuso-etal-2024-task</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.49/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>410</start>
<end>416</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Task-Incremental Learning on Long Text Sequences
%A Graziuso, Natalia
%A Zugarini, Andrea
%A Melacci, Stefano
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F graziuso-etal-2024-task
%X The extraordinary results achieved by Large Language Models are paired with issues that are critical in real-world applications. The costs of inference and, in particular, training are extremely large, both in terms of time and computational resources, and they become prohibitive when working in dynamic environments, where data and tasks are progressively provided over time. The model must be able to adapt to new knowledge, new domains, new settings, without forgetting the previously learned skills. Retraining from scratch easily becomes too costly, thus Continual Learning strategies are of crucial importance. This is even more evident when data consist of “long” documents, that require several resources to be processed by modern neural models, leading to very long prompts. This paper investigates LLM-based Task-Incremental Learning in the case of tasks exploiting long sequences of text, as it is typical in summarization, question-answering on long documents, reviewing long contracts, and several others. We show how adapting the model by Task Arithmetic with LoRA, which was proposed for visual data, yields promising results also in the case of such “long” text data. To our best knowledge, this is the first work along this challenging direction. The outcome of the investigation of this paper is generic enough to represent an important starting point for further research in processing linguistic data in every language.
%U https://aclanthology.org/2024.clicit-1.49/
%P 410-416
Markdown (Informal)
[Task-Incremental Learning on Long Text Sequences](https://aclanthology.org/2024.clicit-1.49/) (Graziuso et al., CLiC-it 2024)
ACL
- Natalia Graziuso, Andrea Zugarini, and Stefano Melacci. 2024. Task-Incremental Learning on Long Text Sequences. In Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), pages 410–416, Pisa, Italy. CEUR Workshop Proceedings.