@inproceedings{yano-miwa-2025-effect,
title = "Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability",
author = "Yano, Ken and
Miwa, Makoto",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Tsujii, Junichi",
booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing",
month = aug,
year = "2025",
address = "Viena, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bionlp-1.2/",
doi = "10.18653/v1/2025.bionlp-1.2",
pages = "18--26",
ISBN = "979-8-89176-275-6",
abstract = "Continual Pre-training (CPT) can help pre-trained large language models (LLMs) effectively adapt to new or under-trained domains or low-resource languages without re-training from scratch.Nevertheless, during CPT, the model{'}s few-shot transfer ability is known to be affected for emergent tasks.We verified this by comparing the performance between the few-shot and fine-tuning settings on the same tasks.We used Llama3-ELAINE-medLLM, which was continually pre-trained on Llama3-8B, targeted for the biomedical domain, and adapted for multilingual languages (English, Japanese, and Chinese).We compared the performance of Llama3-ELAINE-medLLM and Llama3-8B in three emergent tasks: two related domain tasks, entity recognition (NER) and machine translation (MT), and one out-of-domain task, summarization (SUM). Our experimental results show that degradation in few-shot transfer ability does not necessarily indicate the model{'}s underlying potential on the same task after fine-tuning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yano-miwa-2025-effect">
<titleInfo>
<title>Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ken</namePart>
<namePart type="family">Yano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Workshop on Biomedical Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Viena, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-275-6</identifier>
</relatedItem>
<abstract>Continual Pre-training (CPT) can help pre-trained large language models (LLMs) effectively adapt to new or under-trained domains or low-resource languages without re-training from scratch.Nevertheless, during CPT, the model’s few-shot transfer ability is known to be affected for emergent tasks.We verified this by comparing the performance between the few-shot and fine-tuning settings on the same tasks.We used Llama3-ELAINE-medLLM, which was continually pre-trained on Llama3-8B, targeted for the biomedical domain, and adapted for multilingual languages (English, Japanese, and Chinese).We compared the performance of Llama3-ELAINE-medLLM and Llama3-8B in three emergent tasks: two related domain tasks, entity recognition (NER) and machine translation (MT), and one out-of-domain task, summarization (SUM). Our experimental results show that degradation in few-shot transfer ability does not necessarily indicate the model’s underlying potential on the same task after fine-tuning.</abstract>
<identifier type="citekey">yano-miwa-2025-effect</identifier>
<identifier type="doi">10.18653/v1/2025.bionlp-1.2</identifier>
<location>
<url>https://aclanthology.org/2025.bionlp-1.2/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>18</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability
%A Yano, Ken
%A Miwa, Makoto
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Tsujii, Junichi
%S Proceedings of the 24th Workshop on Biomedical Language Processing
%D 2025
%8 August
%I Association for Computational Linguistics
%C Viena, Austria
%@ 979-8-89176-275-6
%F yano-miwa-2025-effect
%X Continual Pre-training (CPT) can help pre-trained large language models (LLMs) effectively adapt to new or under-trained domains or low-resource languages without re-training from scratch.Nevertheless, during CPT, the model’s few-shot transfer ability is known to be affected for emergent tasks.We verified this by comparing the performance between the few-shot and fine-tuning settings on the same tasks.We used Llama3-ELAINE-medLLM, which was continually pre-trained on Llama3-8B, targeted for the biomedical domain, and adapted for multilingual languages (English, Japanese, and Chinese).We compared the performance of Llama3-ELAINE-medLLM and Llama3-8B in three emergent tasks: two related domain tasks, entity recognition (NER) and machine translation (MT), and one out-of-domain task, summarization (SUM). Our experimental results show that degradation in few-shot transfer ability does not necessarily indicate the model’s underlying potential on the same task after fine-tuning.
%R 10.18653/v1/2025.bionlp-1.2
%U https://aclanthology.org/2025.bionlp-1.2/
%U https://doi.org/10.18653/v1/2025.bionlp-1.2
%P 18-26
Markdown (Informal)
[Effect of Multilingual and Domain-adapted Continual Pre-training on Few-shot Promptability](https://aclanthology.org/2025.bionlp-1.2/) (Yano & Miwa, BioNLP 2025)
ACL