@inproceedings{mahmoud-nakov-2023-bertastic,
title = "{BERT}astic at {S}em{E}val-2023 Task 3: Fine-Tuning Pretrained Multilingual Transformers Does Order Matter?",
author = "Mahmoud, Tarek and
Nakov, Preslav",
editor = {Ojha, Atul Kr. and
Do{\u{g}}ru{\"o}z, A. Seza and
Da San Martino, Giovanni and
Tayyar Madabushi, Harish and
Kumar, Ritesh and
Sartori, Elisa},
booktitle = "Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.semeval-1.7",
doi = "10.18653/v1/2023.semeval-1.7",
pages = "58--63",
abstract = {The naive approach for fine-tuning pretrained deep learning models on downstream tasks involves feeding them mini-batches of randomly sampled data. In this paper, we propose a more elaborate method for fine-tuning Pretrained Multilingual Transformers (PMTs) on multilingual data. Inspired by the success of curriculum learning approaches, we investigate the significance of fine-tuning PMTs on multilingual data in a sequential fashion language by language. Unlike the curriculum learning paradigm where the model is presented with increasingly complex examples, we do not adopt a notion of {``}easy{''} and {``}hard{''} samples. Instead, our experiments draw insight from psychological findings on how the human brain processes new information and the persistence of newly learned concepts. We perform our experiments on a challenging news-framing dataset that contains texts in six languages. Our proposed method outperforms the na{\"\i}ve approach by achieving improvements of 2.57{\textbackslash}{\%} in terms of F1 score. Even when we supplement the na{\"\i}ve approach with recency fine-tuning, we still achieve an improvement of 1.34{\textbackslash}{\%} with a 3.63{\textbackslash}{\%}{\$} convergence speed-up. Moreover, we are the first to observe an interesting pattern in which deep learning models exhibit a human-like primacy-recency effect.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahmoud-nakov-2023-bertastic">
<titleInfo>
<title>BERTastic at SemEval-2023 Task 3: Fine-Tuning Pretrained Multilingual Transformers Does Order Matter?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tarek</namePart>
<namePart type="family">Mahmoud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Da San Martino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ritesh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sartori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The naive approach for fine-tuning pretrained deep learning models on downstream tasks involves feeding them mini-batches of randomly sampled data. In this paper, we propose a more elaborate method for fine-tuning Pretrained Multilingual Transformers (PMTs) on multilingual data. Inspired by the success of curriculum learning approaches, we investigate the significance of fine-tuning PMTs on multilingual data in a sequential fashion language by language. Unlike the curriculum learning paradigm where the model is presented with increasingly complex examples, we do not adopt a notion of “easy” and “hard” samples. Instead, our experiments draw insight from psychological findings on how the human brain processes new information and the persistence of newly learned concepts. We perform our experiments on a challenging news-framing dataset that contains texts in six languages. Our proposed method outperforms the naïve approach by achieving improvements of 2.57\textbackslash% in terms of F1 score. Even when we supplement the naïve approach with recency fine-tuning, we still achieve an improvement of 1.34\textbackslash% with a 3.63\textbackslash%$ convergence speed-up. Moreover, we are the first to observe an interesting pattern in which deep learning models exhibit a human-like primacy-recency effect.</abstract>
<identifier type="citekey">mahmoud-nakov-2023-bertastic</identifier>
<identifier type="doi">10.18653/v1/2023.semeval-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.semeval-1.7</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>58</start>
<end>63</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BERTastic at SemEval-2023 Task 3: Fine-Tuning Pretrained Multilingual Transformers Does Order Matter?
%A Mahmoud, Tarek
%A Nakov, Preslav
%Y Ojha, Atul Kr.
%Y Doğruöz, A. Seza
%Y Da San Martino, Giovanni
%Y Tayyar Madabushi, Harish
%Y Kumar, Ritesh
%Y Sartori, Elisa
%S Proceedings of the 17th International Workshop on Semantic Evaluation (SemEval-2023)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F mahmoud-nakov-2023-bertastic
%X The naive approach for fine-tuning pretrained deep learning models on downstream tasks involves feeding them mini-batches of randomly sampled data. In this paper, we propose a more elaborate method for fine-tuning Pretrained Multilingual Transformers (PMTs) on multilingual data. Inspired by the success of curriculum learning approaches, we investigate the significance of fine-tuning PMTs on multilingual data in a sequential fashion language by language. Unlike the curriculum learning paradigm where the model is presented with increasingly complex examples, we do not adopt a notion of “easy” and “hard” samples. Instead, our experiments draw insight from psychological findings on how the human brain processes new information and the persistence of newly learned concepts. We perform our experiments on a challenging news-framing dataset that contains texts in six languages. Our proposed method outperforms the naïve approach by achieving improvements of 2.57\textbackslash% in terms of F1 score. Even when we supplement the naïve approach with recency fine-tuning, we still achieve an improvement of 1.34\textbackslash% with a 3.63\textbackslash%$ convergence speed-up. Moreover, we are the first to observe an interesting pattern in which deep learning models exhibit a human-like primacy-recency effect.
%R 10.18653/v1/2023.semeval-1.7
%U https://aclanthology.org/2023.semeval-1.7
%U https://doi.org/10.18653/v1/2023.semeval-1.7
%P 58-63
Markdown (Informal)
[BERTastic at SemEval-2023 Task 3: Fine-Tuning Pretrained Multilingual Transformers Does Order Matter?](https://aclanthology.org/2023.semeval-1.7) (Mahmoud & Nakov, SemEval 2023)
ACL