@inproceedings{ji-etal-2025-unlocking,
title = "Unlocking {LLM}s' Self-Improvement Capacity with Autonomous Learning for Domain Adaptation",
author = "Ji, Ke and
Chen, Junying and
Gao, Anningzhe and
Xie, Wenya and
Wan, Xiang and
Wang, Benyou",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1084/",
doi = "10.18653/v1/2025.findings-acl.1084",
pages = "21051--21067",
ISBN = "979-8-89176-256-5",
abstract = "Self-supervised pre-training and instruction fine-tuning demonstrate the potential of large language models (LLMs) for domain adaptation (DA). In pursuit of superhuman performance, LLMs have demonstrated significant potential in math and coding through self-improvement algorithms that rely on iterative training with self-generated data. This success stems from the clear reward signals in these environments, which provide a solid foundation for self-improvement. However, when it comes to general DA scenarios, two main challenges emerge: 1) ambiguous self-improvement reward signals and 2) lack of high-quality instruction fine-tuning datasets. This motivates this paper addresses how LLMs can adapt autonomously to new domains using only a large amount of unlabeled target corpora. Inspired by the human practice of self-reflection through open- and closed-book exercises to achieve domain generalization, we propose autonomous learning, which creates a self-improvement learning environment for DA. Here, the model generates questions from documents and conducts two explorations{---}one with the original document and one with a masked version. By comparing these explorations, the LLMs can independently identify and enhance its policy for reducing knowledge gaps. Experiments across various DA tasks demonstrate that autonomous learning enhances the DA performance of existing models, outperforming traditional fine-tuning and self-improvement methods. Our code is publicly available at https://github.com/FreedomIntelligence/AL."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ji-etal-2025-unlocking">
<titleInfo>
<title>Unlocking LLMs’ Self-Improvement Capacity with Autonomous Learning for Domain Adaptation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ke</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junying</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anningzhe</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenya</namePart>
<namePart type="family">Xie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benyou</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Self-supervised pre-training and instruction fine-tuning demonstrate the potential of large language models (LLMs) for domain adaptation (DA). In pursuit of superhuman performance, LLMs have demonstrated significant potential in math and coding through self-improvement algorithms that rely on iterative training with self-generated data. This success stems from the clear reward signals in these environments, which provide a solid foundation for self-improvement. However, when it comes to general DA scenarios, two main challenges emerge: 1) ambiguous self-improvement reward signals and 2) lack of high-quality instruction fine-tuning datasets. This motivates this paper addresses how LLMs can adapt autonomously to new domains using only a large amount of unlabeled target corpora. Inspired by the human practice of self-reflection through open- and closed-book exercises to achieve domain generalization, we propose autonomous learning, which creates a self-improvement learning environment for DA. Here, the model generates questions from documents and conducts two explorations—one with the original document and one with a masked version. By comparing these explorations, the LLMs can independently identify and enhance its policy for reducing knowledge gaps. Experiments across various DA tasks demonstrate that autonomous learning enhances the DA performance of existing models, outperforming traditional fine-tuning and self-improvement methods. Our code is publicly available at https://github.com/FreedomIntelligence/AL.</abstract>
<identifier type="citekey">ji-etal-2025-unlocking</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1084</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1084/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>21051</start>
<end>21067</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unlocking LLMs’ Self-Improvement Capacity with Autonomous Learning for Domain Adaptation
%A Ji, Ke
%A Chen, Junying
%A Gao, Anningzhe
%A Xie, Wenya
%A Wan, Xiang
%A Wang, Benyou
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F ji-etal-2025-unlocking
%X Self-supervised pre-training and instruction fine-tuning demonstrate the potential of large language models (LLMs) for domain adaptation (DA). In pursuit of superhuman performance, LLMs have demonstrated significant potential in math and coding through self-improvement algorithms that rely on iterative training with self-generated data. This success stems from the clear reward signals in these environments, which provide a solid foundation for self-improvement. However, when it comes to general DA scenarios, two main challenges emerge: 1) ambiguous self-improvement reward signals and 2) lack of high-quality instruction fine-tuning datasets. This motivates this paper addresses how LLMs can adapt autonomously to new domains using only a large amount of unlabeled target corpora. Inspired by the human practice of self-reflection through open- and closed-book exercises to achieve domain generalization, we propose autonomous learning, which creates a self-improvement learning environment for DA. Here, the model generates questions from documents and conducts two explorations—one with the original document and one with a masked version. By comparing these explorations, the LLMs can independently identify and enhance its policy for reducing knowledge gaps. Experiments across various DA tasks demonstrate that autonomous learning enhances the DA performance of existing models, outperforming traditional fine-tuning and self-improvement methods. Our code is publicly available at https://github.com/FreedomIntelligence/AL.
%R 10.18653/v1/2025.findings-acl.1084
%U https://aclanthology.org/2025.findings-acl.1084/
%U https://doi.org/10.18653/v1/2025.findings-acl.1084
%P 21051-21067
Markdown (Informal)
[Unlocking LLMs’ Self-Improvement Capacity with Autonomous Learning for Domain Adaptation](https://aclanthology.org/2025.findings-acl.1084/) (Ji et al., Findings 2025)
ACL