@article{zhang-etal-2025-active-knowledge,
title = "Active Knowledge Structuring for Large Language Models in Materials Science Text Mining",
author = "Zhang, Xin and
Yuan, Jingling and
Zhang, Peiliang and
Liu, Jia and
Li, Lin",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.tacl-1.55/",
doi = "10.1162/tacl.a.36",
pages = "1186--1203",
abstract = "Large Language Models (LLMs) offer a promising alternative to traditional Materials Science Text Mining (MSTM) by reducing the need for extensive data labeling and fine-tuning. However, existing zero-/few-shot methods still face limitations in aligning with personalized needs in scientific discovery. To address this, we propose ClassMATe, an active knowledge structuring approach for MSTM. Specifically, we first propose a class definition stylization method to structure knowledge, enabling explicit clustering of latent material knowledge in LLMs for enhanced inference. To align with the scientists' needs, we propose an active needs refining strategy that iteratively clarifies needs by learning from uncertainty-aware hard samples of LLMs, further refining the knowledge structuring. Extensive experiments on seven tasks and eight datasets show that ClassMATe, as a plug-and-play method, achieves performance comparable to supervised learning without requiring fine-tuning or extra knowledge base, highlighting the potential to bridge the gap between LLMs' latent knowledge and real-world scientific applications.1"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2025-active-knowledge">
<titleInfo>
<title>Active Knowledge Structuring for Large Language Models in Materials Science Text Mining</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jingling</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peiliang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jia</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) offer a promising alternative to traditional Materials Science Text Mining (MSTM) by reducing the need for extensive data labeling and fine-tuning. However, existing zero-/few-shot methods still face limitations in aligning with personalized needs in scientific discovery. To address this, we propose ClassMATe, an active knowledge structuring approach for MSTM. Specifically, we first propose a class definition stylization method to structure knowledge, enabling explicit clustering of latent material knowledge in LLMs for enhanced inference. To align with the scientists’ needs, we propose an active needs refining strategy that iteratively clarifies needs by learning from uncertainty-aware hard samples of LLMs, further refining the knowledge structuring. Extensive experiments on seven tasks and eight datasets show that ClassMATe, as a plug-and-play method, achieves performance comparable to supervised learning without requiring fine-tuning or extra knowledge base, highlighting the potential to bridge the gap between LLMs’ latent knowledge and real-world scientific applications.1</abstract>
<identifier type="citekey">zhang-etal-2025-active-knowledge</identifier>
<identifier type="doi">10.1162/tacl.a.36</identifier>
<location>
<url>https://aclanthology.org/2025.tacl-1.55/</url>
</location>
<part>
<date>2025</date>
<detail type="volume"><number>13</number></detail>
<extent unit="page">
<start>1186</start>
<end>1203</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Active Knowledge Structuring for Large Language Models in Materials Science Text Mining
%A Zhang, Xin
%A Yuan, Jingling
%A Zhang, Peiliang
%A Liu, Jia
%A Li, Lin
%J Transactions of the Association for Computational Linguistics
%D 2025
%V 13
%I MIT Press
%C Cambridge, MA
%F zhang-etal-2025-active-knowledge
%X Large Language Models (LLMs) offer a promising alternative to traditional Materials Science Text Mining (MSTM) by reducing the need for extensive data labeling and fine-tuning. However, existing zero-/few-shot methods still face limitations in aligning with personalized needs in scientific discovery. To address this, we propose ClassMATe, an active knowledge structuring approach for MSTM. Specifically, we first propose a class definition stylization method to structure knowledge, enabling explicit clustering of latent material knowledge in LLMs for enhanced inference. To align with the scientists’ needs, we propose an active needs refining strategy that iteratively clarifies needs by learning from uncertainty-aware hard samples of LLMs, further refining the knowledge structuring. Extensive experiments on seven tasks and eight datasets show that ClassMATe, as a plug-and-play method, achieves performance comparable to supervised learning without requiring fine-tuning or extra knowledge base, highlighting the potential to bridge the gap between LLMs’ latent knowledge and real-world scientific applications.1
%R 10.1162/tacl.a.36
%U https://aclanthology.org/2025.tacl-1.55/
%U https://doi.org/10.1162/tacl.a.36
%P 1186-1203
Markdown (Informal)
[Active Knowledge Structuring for Large Language Models in Materials Science Text Mining](https://aclanthology.org/2025.tacl-1.55/) (Zhang et al., TACL 2025)
ACL