@inproceedings{kuramoto-suzuki-2025-predicting,
title = "Predicting Fine-tuned Performance on Larger Datasets Before Creating Them",
author = "Kuramoto, Toshiki and
Suzuki, Jun",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven and
Darwish, Kareem and
Agarwal, Apoorv",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics: Industry Track",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-industry.17/",
pages = "204--212",
abstract = "This paper proposes a method to estimate the performance of pretrained models fine-tuned with a larger dataset from the result with a smaller dataset. Specifically, we demonstrate that when a pretrained model is fine-tuned, its classification performance increases at the same overall rate, regardless of the original dataset size, as the number of epochs increases. Subsequently, we verify that an approximate formula based on this trend can be used to predict the performance when the model is trained with ten times or more training data, even when the initial training dataset is limited. Our results show that this approach can help resource-limited companies develop machine-learning models."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuramoto-suzuki-2025-predicting">
<titleInfo>
<title>Predicting Fine-tuned Performance on Larger Datasets Before Creating Them</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiki</namePart>
<namePart type="family">Kuramoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Apoorv</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper proposes a method to estimate the performance of pretrained models fine-tuned with a larger dataset from the result with a smaller dataset. Specifically, we demonstrate that when a pretrained model is fine-tuned, its classification performance increases at the same overall rate, regardless of the original dataset size, as the number of epochs increases. Subsequently, we verify that an approximate formula based on this trend can be used to predict the performance when the model is trained with ten times or more training data, even when the initial training dataset is limited. Our results show that this approach can help resource-limited companies develop machine-learning models.</abstract>
<identifier type="citekey">kuramoto-suzuki-2025-predicting</identifier>
<location>
<url>https://aclanthology.org/2025.coling-industry.17/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>204</start>
<end>212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting Fine-tuned Performance on Larger Datasets Before Creating Them
%A Kuramoto, Toshiki
%A Suzuki, Jun
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%Y Darwish, Kareem
%Y Agarwal, Apoorv
%S Proceedings of the 31st International Conference on Computational Linguistics: Industry Track
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F kuramoto-suzuki-2025-predicting
%X This paper proposes a method to estimate the performance of pretrained models fine-tuned with a larger dataset from the result with a smaller dataset. Specifically, we demonstrate that when a pretrained model is fine-tuned, its classification performance increases at the same overall rate, regardless of the original dataset size, as the number of epochs increases. Subsequently, we verify that an approximate formula based on this trend can be used to predict the performance when the model is trained with ten times or more training data, even when the initial training dataset is limited. Our results show that this approach can help resource-limited companies develop machine-learning models.
%U https://aclanthology.org/2025.coling-industry.17/
%P 204-212
Markdown (Informal)
[Predicting Fine-tuned Performance on Larger Datasets Before Creating Them](https://aclanthology.org/2025.coling-industry.17/) (Kuramoto & Suzuki, COLING 2025)
ACL