@inproceedings{safikhani-broneske-2025-automl,
title = "{A}uto{ML} Meets Hugging Face: Domain-Aware Pretrained Model Selection for Text Classification",
author = "Safikhani, Parisa and
Broneske, David",
editor = "Ebrahimi, Abteen and
Haider, Samar and
Liu, Emmy and
Haider, Sammar and
Leonor Pacheco, Maria and
Wein, Shira",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)",
month = apr,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-srw.45/",
doi = "10.18653/v1/2025.naacl-srw.45",
pages = "466--473",
ISBN = "979-8-89176-192-6",
abstract = "The effectiveness of embedding methods is crucial for optimizing text classification performance in Automated Machine Learning (AutoML). However, selecting the most suitable pre-trained model for a given task remains challenging. This study introduces the Corpus-Driven Domain Mapping (CDDM) pipeline, which utilizes a domain-annotated corpus of pre-fine-tuned models from the Hugging Face Model Hub to improve model selection. Integrating these models into AutoML systems significantly boosts classification performance across multiple datasets compared to baseline methods. Despite some domain recognition inaccuracies, results demonstrate CDDM{'}s potential to enhance model selection, streamline AutoML workflows, and reduce computational costs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="safikhani-broneske-2025-automl">
<titleInfo>
<title>AutoML Meets Hugging Face: Domain-Aware Pretrained Model Selection for Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parisa</namePart>
<namePart type="family">Safikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Broneske</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmy</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sammar</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Leonor Pacheco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shira</namePart>
<namePart type="family">Wein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-192-6</identifier>
</relatedItem>
<abstract>The effectiveness of embedding methods is crucial for optimizing text classification performance in Automated Machine Learning (AutoML). However, selecting the most suitable pre-trained model for a given task remains challenging. This study introduces the Corpus-Driven Domain Mapping (CDDM) pipeline, which utilizes a domain-annotated corpus of pre-fine-tuned models from the Hugging Face Model Hub to improve model selection. Integrating these models into AutoML systems significantly boosts classification performance across multiple datasets compared to baseline methods. Despite some domain recognition inaccuracies, results demonstrate CDDM’s potential to enhance model selection, streamline AutoML workflows, and reduce computational costs.</abstract>
<identifier type="citekey">safikhani-broneske-2025-automl</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-srw.45</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-srw.45/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>466</start>
<end>473</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AutoML Meets Hugging Face: Domain-Aware Pretrained Model Selection for Text Classification
%A Safikhani, Parisa
%A Broneske, David
%Y Ebrahimi, Abteen
%Y Haider, Samar
%Y Liu, Emmy
%Y Haider, Sammar
%Y Leonor Pacheco, Maria
%Y Wein, Shira
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 4: Student Research Workshop)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, USA
%@ 979-8-89176-192-6
%F safikhani-broneske-2025-automl
%X The effectiveness of embedding methods is crucial for optimizing text classification performance in Automated Machine Learning (AutoML). However, selecting the most suitable pre-trained model for a given task remains challenging. This study introduces the Corpus-Driven Domain Mapping (CDDM) pipeline, which utilizes a domain-annotated corpus of pre-fine-tuned models from the Hugging Face Model Hub to improve model selection. Integrating these models into AutoML systems significantly boosts classification performance across multiple datasets compared to baseline methods. Despite some domain recognition inaccuracies, results demonstrate CDDM’s potential to enhance model selection, streamline AutoML workflows, and reduce computational costs.
%R 10.18653/v1/2025.naacl-srw.45
%U https://aclanthology.org/2025.naacl-srw.45/
%U https://doi.org/10.18653/v1/2025.naacl-srw.45
%P 466-473
Markdown (Informal)
[AutoML Meets Hugging Face: Domain-Aware Pretrained Model Selection for Text Classification](https://aclanthology.org/2025.naacl-srw.45/) (Safikhani & Broneske, NAACL 2025)
ACL