@inproceedings{wang-etal-2025-crystalicl,
title = "{C}rystal{ICL}: Enabling In-Context Learning for Crystal Generation",
author = "Wang, Ruobing and
Tan, Qiaoyu and
Wang, Yili and
Wang, Ying and
Wang, Xin",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.929/",
pages = "18440--18455",
ISBN = "979-8-89176-332-6",
abstract = "Designing crystal materials with desired physicochemical properties remains a fundamental challenge in materials science. While large language models (LLMs) have demonstrated strong in-context learning (ICL) capabilities, existing LLM-based crystal generation approaches are limited to zero-shot scenarios and are unable to benefit from few-shot scenarios. In contrast, human experts typically design new materials by modifying relevant known structures which aligns closely with the few-shot ICL paradigm. Motivated by this, we propose CrystalICL, a novel model designed for few-shot crystal generation. Specifically, we introduce a space-group based crystal tokenization method, which effectively reduces the complexity of modeling crystal symmetry in LLMs. We further introduce a condition-structure aware hybrid instruction tuning framework and a multi-task instruction tuning strategy, enabling the model to better exploit ICL by capturing structure-property relationships from limited data. Extensive experiments on four crystal generation benchmarks demonstrate the superiority of CrystalICL over the leading baseline methods on conditional and unconditional generation tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2025-crystalicl">
<titleInfo>
<title>CrystalICL: Enabling In-Context Learning for Crystal Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruobing</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiaoyu</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yili</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ying</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Designing crystal materials with desired physicochemical properties remains a fundamental challenge in materials science. While large language models (LLMs) have demonstrated strong in-context learning (ICL) capabilities, existing LLM-based crystal generation approaches are limited to zero-shot scenarios and are unable to benefit from few-shot scenarios. In contrast, human experts typically design new materials by modifying relevant known structures which aligns closely with the few-shot ICL paradigm. Motivated by this, we propose CrystalICL, a novel model designed for few-shot crystal generation. Specifically, we introduce a space-group based crystal tokenization method, which effectively reduces the complexity of modeling crystal symmetry in LLMs. We further introduce a condition-structure aware hybrid instruction tuning framework and a multi-task instruction tuning strategy, enabling the model to better exploit ICL by capturing structure-property relationships from limited data. Extensive experiments on four crystal generation benchmarks demonstrate the superiority of CrystalICL over the leading baseline methods on conditional and unconditional generation tasks.</abstract>
<identifier type="citekey">wang-etal-2025-crystalicl</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.929/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>18440</start>
<end>18455</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CrystalICL: Enabling In-Context Learning for Crystal Generation
%A Wang, Ruobing
%A Tan, Qiaoyu
%A Wang, Yili
%A Wang, Ying
%A Wang, Xin
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F wang-etal-2025-crystalicl
%X Designing crystal materials with desired physicochemical properties remains a fundamental challenge in materials science. While large language models (LLMs) have demonstrated strong in-context learning (ICL) capabilities, existing LLM-based crystal generation approaches are limited to zero-shot scenarios and are unable to benefit from few-shot scenarios. In contrast, human experts typically design new materials by modifying relevant known structures which aligns closely with the few-shot ICL paradigm. Motivated by this, we propose CrystalICL, a novel model designed for few-shot crystal generation. Specifically, we introduce a space-group based crystal tokenization method, which effectively reduces the complexity of modeling crystal symmetry in LLMs. We further introduce a condition-structure aware hybrid instruction tuning framework and a multi-task instruction tuning strategy, enabling the model to better exploit ICL by capturing structure-property relationships from limited data. Extensive experiments on four crystal generation benchmarks demonstrate the superiority of CrystalICL over the leading baseline methods on conditional and unconditional generation tasks.
%U https://aclanthology.org/2025.emnlp-main.929/
%P 18440-18455
Markdown (Informal)
[CrystalICL: Enabling In-Context Learning for Crystal Generation](https://aclanthology.org/2025.emnlp-main.929/) (Wang et al., EMNLP 2025)
ACL