@inproceedings{kim-etal-2025-ixi,
title = "ixi-{GEN}: Efficient Industrial s{LLM}s through Domain Adaptive Continual Pretraining",
author = "Kim, Seonwu and
Na, Yohan and
Kim, Kihun and
Cho, Hanhee and
Lim, Geun and
Kim, Mintae and
Park, Seongik and
Kim, Ki Hyun and
Han, Youngsub and
Jeon, Byoung-Ki",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.165/",
pages = "2387--2404",
ISBN = "979-8-89176-333-3",
abstract = "The emergence of open-source large language models (LLMs) has expanded opportunities for enterprise applications; however, many organizations still lack the infrastructure to deploy and maintain large-scale models. As a result, small LLMs (sLLMs) have become a practical alternative despite inherent performance limitations. While Domain Adaptive Continual Pretraining (DACP) has been explored for domain adaptation, its utility in commercial settings remains under-examined. In this study, we validate the effectiveness of a DACP-based recipe across diverse foundation models and service domains, producing DACP-applied sLLMs (ixi-GEN). Through extensive experiments and real-world evaluations, we demonstrate that ixi-GEN models achieve substantial gains in target-domain performance while preserving general capabilities, offering a cost-efficient and scalable solution for enterprise-level deployment."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2025-ixi">
<titleInfo>
<title>ixi-GEN: Efficient Industrial sLLMs through Domain Adaptive Continual Pretraining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seonwu</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yohan</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kihun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanhee</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geun</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mintae</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seongik</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ki</namePart>
<namePart type="given">Hyun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youngsub</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byoung-Ki</namePart>
<namePart type="family">Jeon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>The emergence of open-source large language models (LLMs) has expanded opportunities for enterprise applications; however, many organizations still lack the infrastructure to deploy and maintain large-scale models. As a result, small LLMs (sLLMs) have become a practical alternative despite inherent performance limitations. While Domain Adaptive Continual Pretraining (DACP) has been explored for domain adaptation, its utility in commercial settings remains under-examined. In this study, we validate the effectiveness of a DACP-based recipe across diverse foundation models and service domains, producing DACP-applied sLLMs (ixi-GEN). Through extensive experiments and real-world evaluations, we demonstrate that ixi-GEN models achieve substantial gains in target-domain performance while preserving general capabilities, offering a cost-efficient and scalable solution for enterprise-level deployment.</abstract>
<identifier type="citekey">kim-etal-2025-ixi</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.165/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2387</start>
<end>2404</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ixi-GEN: Efficient Industrial sLLMs through Domain Adaptive Continual Pretraining
%A Kim, Seonwu
%A Na, Yohan
%A Kim, Kihun
%A Cho, Hanhee
%A Lim, Geun
%A Kim, Mintae
%A Park, Seongik
%A Kim, Ki Hyun
%A Han, Youngsub
%A Jeon, Byoung-Ki
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F kim-etal-2025-ixi
%X The emergence of open-source large language models (LLMs) has expanded opportunities for enterprise applications; however, many organizations still lack the infrastructure to deploy and maintain large-scale models. As a result, small LLMs (sLLMs) have become a practical alternative despite inherent performance limitations. While Domain Adaptive Continual Pretraining (DACP) has been explored for domain adaptation, its utility in commercial settings remains under-examined. In this study, we validate the effectiveness of a DACP-based recipe across diverse foundation models and service domains, producing DACP-applied sLLMs (ixi-GEN). Through extensive experiments and real-world evaluations, we demonstrate that ixi-GEN models achieve substantial gains in target-domain performance while preserving general capabilities, offering a cost-efficient and scalable solution for enterprise-level deployment.
%U https://aclanthology.org/2025.emnlp-industry.165/
%P 2387-2404
Markdown (Informal)
[ixi-GEN: Efficient Industrial sLLMs through Domain Adaptive Continual Pretraining](https://aclanthology.org/2025.emnlp-industry.165/) (Kim et al., EMNLP 2025)
ACL
- Seonwu Kim, Yohan Na, Kihun Kim, Hanhee Cho, Geun Lim, Mintae Kim, Seongik Park, Ki Hyun Kim, Youngsub Han, and Byoung-Ki Jeon. 2025. ixi-GEN: Efficient Industrial sLLMs through Domain Adaptive Continual Pretraining. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 2387–2404, Suzhou (China). Association for Computational Linguistics.