@inproceedings{sanchez-carmona-etal-2024-well,
title = "How Well Can a Genetic Algorithm Fine-tune Transformer Encoders? A First Approach",
author = "Sanchez Carmona, Vicente Ivan and
Jiang, Shanshan and
Dong, Bin",
editor = "Tafreshi, Shabnam and
Akula, Arjun and
Sedoc, Jo{\~a}o and
Drozd, Aleksandr and
Rogers, Anna and
Rumshisky, Anna",
booktitle = "Proceedings of the Fifth Workshop on Insights from Negative Results in NLP",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.insights-1.4",
doi = "10.18653/v1/2024.insights-1.4",
pages = "25--33",
abstract = "Genetic Algorithms (GAs) have been studied across different fields such as engineering or medicine to optimize diverse problems such as network routing, or medical image segmentation. Moreover, they have been used to automatically find optimal architectures for deep neural networks. However, to our knowledge, they have not been applied as a weight optimizer for the Transformer model. While gradient descent has been the main paradigm for this task, we believe that GAs have advantages to bring to the table. In this paper, we will show that even though GAs are capable of fine-tuning Transformer encoders, their generalization ability is considerably poorer than that from Adam; however, on a closer look, GAs ability to exploit knowledge from 2 different pretraining datasets surpasses Adam{'}s ability to do so.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sanchez-carmona-etal-2024-well">
<titleInfo>
<title>How Well Can a Genetic Algorithm Fine-tune Transformer Encoders? A First Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vicente</namePart>
<namePart type="given">Ivan</namePart>
<namePart type="family">Sanchez Carmona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shanshan</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shabnam</namePart>
<namePart type="family">Tafreshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arjun</namePart>
<namePart type="family">Akula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleksandr</namePart>
<namePart type="family">Drozd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Genetic Algorithms (GAs) have been studied across different fields such as engineering or medicine to optimize diverse problems such as network routing, or medical image segmentation. Moreover, they have been used to automatically find optimal architectures for deep neural networks. However, to our knowledge, they have not been applied as a weight optimizer for the Transformer model. While gradient descent has been the main paradigm for this task, we believe that GAs have advantages to bring to the table. In this paper, we will show that even though GAs are capable of fine-tuning Transformer encoders, their generalization ability is considerably poorer than that from Adam; however, on a closer look, GAs ability to exploit knowledge from 2 different pretraining datasets surpasses Adam’s ability to do so.</abstract>
<identifier type="citekey">sanchez-carmona-etal-2024-well</identifier>
<identifier type="doi">10.18653/v1/2024.insights-1.4</identifier>
<location>
<url>https://aclanthology.org/2024.insights-1.4</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>25</start>
<end>33</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Well Can a Genetic Algorithm Fine-tune Transformer Encoders? A First Approach
%A Sanchez Carmona, Vicente Ivan
%A Jiang, Shanshan
%A Dong, Bin
%Y Tafreshi, Shabnam
%Y Akula, Arjun
%Y Sedoc, João
%Y Drozd, Aleksandr
%Y Rogers, Anna
%Y Rumshisky, Anna
%S Proceedings of the Fifth Workshop on Insights from Negative Results in NLP
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F sanchez-carmona-etal-2024-well
%X Genetic Algorithms (GAs) have been studied across different fields such as engineering or medicine to optimize diverse problems such as network routing, or medical image segmentation. Moreover, they have been used to automatically find optimal architectures for deep neural networks. However, to our knowledge, they have not been applied as a weight optimizer for the Transformer model. While gradient descent has been the main paradigm for this task, we believe that GAs have advantages to bring to the table. In this paper, we will show that even though GAs are capable of fine-tuning Transformer encoders, their generalization ability is considerably poorer than that from Adam; however, on a closer look, GAs ability to exploit knowledge from 2 different pretraining datasets surpasses Adam’s ability to do so.
%R 10.18653/v1/2024.insights-1.4
%U https://aclanthology.org/2024.insights-1.4
%U https://doi.org/10.18653/v1/2024.insights-1.4
%P 25-33
Markdown (Informal)
[How Well Can a Genetic Algorithm Fine-tune Transformer Encoders? A First Approach](https://aclanthology.org/2024.insights-1.4) (Sanchez Carmona et al., insights-WS 2024)
ACL