@inproceedings{luo-etal-2025-velocitune,
title = "Velocitune: A Velocity-based Dynamic Domain Reweighting Method for Continual Pre-training",
author = "Luo, Zheheng and
Zhang, Xin and
Liu, Xiao and
Li, Haoling and
Gong, Yeyun and
Chen, Qi and
Cheng, Peng",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.813/",
doi = "10.18653/v1/2025.acl-long.813",
pages = "16644--16656",
ISBN = "979-8-89176-251-0",
abstract = "It is well-known that a diverse corpus is critical for training large language models, which are typically constructed from a mixture of various domains. In general, previous efforts resort to either sampling training data from different domains with static proportions or dynamically adjusting these proportions during training to optimise pretraining performance. However, few methods addressed the complexities of domain-adaptive continual pre-training. To fill this gap, we propose Velocitune, a novel framework that dynamically assesses learning velocity and adjusts data proportions accordingly, favouring slower learning domains while de-emphasising faster learning ones, which is guided by a scaling law to estimate the desired learning goal for each domain with a less associated cost. To evaluate the effectiveness of Velocitune, we conduct experiments on a dataset focused on reasoning tasks with CodeLlama, as well as on a corpus of system commands using Llama3 and Mistral. Velocitune achieves performance gains in both math and code reasoning tasks and command-line generation benchmarks. Further analysis reveals that key factors driving Velocitune{'}s effectiveness include target estimation and data ordering."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2025-velocitune">
<titleInfo>
<title>Velocitune: A Velocity-based Dynamic Domain Reweighting Method for Continual Pre-training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zheheng</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeyun</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peng</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>It is well-known that a diverse corpus is critical for training large language models, which are typically constructed from a mixture of various domains. In general, previous efforts resort to either sampling training data from different domains with static proportions or dynamically adjusting these proportions during training to optimise pretraining performance. However, few methods addressed the complexities of domain-adaptive continual pre-training. To fill this gap, we propose Velocitune, a novel framework that dynamically assesses learning velocity and adjusts data proportions accordingly, favouring slower learning domains while de-emphasising faster learning ones, which is guided by a scaling law to estimate the desired learning goal for each domain with a less associated cost. To evaluate the effectiveness of Velocitune, we conduct experiments on a dataset focused on reasoning tasks with CodeLlama, as well as on a corpus of system commands using Llama3 and Mistral. Velocitune achieves performance gains in both math and code reasoning tasks and command-line generation benchmarks. Further analysis reveals that key factors driving Velocitune’s effectiveness include target estimation and data ordering.</abstract>
<identifier type="citekey">luo-etal-2025-velocitune</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.813</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.813/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>16644</start>
<end>16656</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Velocitune: A Velocity-based Dynamic Domain Reweighting Method for Continual Pre-training
%A Luo, Zheheng
%A Zhang, Xin
%A Liu, Xiao
%A Li, Haoling
%A Gong, Yeyun
%A Chen, Qi
%A Cheng, Peng
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F luo-etal-2025-velocitune
%X It is well-known that a diverse corpus is critical for training large language models, which are typically constructed from a mixture of various domains. In general, previous efforts resort to either sampling training data from different domains with static proportions or dynamically adjusting these proportions during training to optimise pretraining performance. However, few methods addressed the complexities of domain-adaptive continual pre-training. To fill this gap, we propose Velocitune, a novel framework that dynamically assesses learning velocity and adjusts data proportions accordingly, favouring slower learning domains while de-emphasising faster learning ones, which is guided by a scaling law to estimate the desired learning goal for each domain with a less associated cost. To evaluate the effectiveness of Velocitune, we conduct experiments on a dataset focused on reasoning tasks with CodeLlama, as well as on a corpus of system commands using Llama3 and Mistral. Velocitune achieves performance gains in both math and code reasoning tasks and command-line generation benchmarks. Further analysis reveals that key factors driving Velocitune’s effectiveness include target estimation and data ordering.
%R 10.18653/v1/2025.acl-long.813
%U https://aclanthology.org/2025.acl-long.813/
%U https://doi.org/10.18653/v1/2025.acl-long.813
%P 16644-16656
Markdown (Informal)
[Velocitune: A Velocity-based Dynamic Domain Reweighting Method for Continual Pre-training](https://aclanthology.org/2025.acl-long.813/) (Luo et al., ACL 2025)
ACL