@inproceedings{kim-etal-2024-seed,
title = "{SEED}: Semantic Knowledge Transfer for Language Model Adaptation to Materials Science",
author = "Kim, Yeachan and
Park, Jun-Hyung and
Kim, SungHo and
Park, Juhyeong and
Kim, Sangyun and
Lee, SangKeun",
editor = "Dernoncourt, Franck and
Preo{\c{t}}iuc-Pietro, Daniel and
Shimorina, Anastasia",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2024",
address = "Miami, Florida, US",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-industry.31",
pages = "421--428",
abstract = "Materials science is an interdisciplinary field focused on studying and discovering materials around us. However, due to the vast space of materials, datasets in this field are typically scarce and have limited coverage. This inherent limitation makes current adaptation methods less effective when adapting pre-trained language models (PLMs) to materials science, as these methods rely heavily on the frequency information from limited downstream datasets. In this paper, we propose Semantic Knowledge Transfer (SEED), a novel vocabulary expansion method to adapt the pre-trained language models for materials science. The core strategy of SEED is to transfer the materials knowledge of lightweight embeddings into the PLMs. To this end, we introduce knowledge bridge networks, which learn to transfer the latent knowledge of the materials embeddings into ones compatible with PLMs. By expanding the embedding layer of PLMs with these transformed embeddings, PLMs can comprehensively understand the complex terminology associated with materials science. We conduct extensive experiments across a broad range of materials-related benchmarks. Comprehensive evaluation results convincingly demonstrate that SEED mitigates the mentioned limitations of previous adaptation methods, showcasing the efficacy of transferring embedding knowledge into PLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2024-seed">
<titleInfo>
<title>SEED: Semantic Knowledge Transfer for Language Model Adaptation to Materials Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yeachan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun-Hyung</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SungHo</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juhyeong</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sangyun</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SangKeun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franck</namePart>
<namePart type="family">Dernoncourt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoţiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, US</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Materials science is an interdisciplinary field focused on studying and discovering materials around us. However, due to the vast space of materials, datasets in this field are typically scarce and have limited coverage. This inherent limitation makes current adaptation methods less effective when adapting pre-trained language models (PLMs) to materials science, as these methods rely heavily on the frequency information from limited downstream datasets. In this paper, we propose Semantic Knowledge Transfer (SEED), a novel vocabulary expansion method to adapt the pre-trained language models for materials science. The core strategy of SEED is to transfer the materials knowledge of lightweight embeddings into the PLMs. To this end, we introduce knowledge bridge networks, which learn to transfer the latent knowledge of the materials embeddings into ones compatible with PLMs. By expanding the embedding layer of PLMs with these transformed embeddings, PLMs can comprehensively understand the complex terminology associated with materials science. We conduct extensive experiments across a broad range of materials-related benchmarks. Comprehensive evaluation results convincingly demonstrate that SEED mitigates the mentioned limitations of previous adaptation methods, showcasing the efficacy of transferring embedding knowledge into PLMs.</abstract>
<identifier type="citekey">kim-etal-2024-seed</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-industry.31</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>421</start>
<end>428</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SEED: Semantic Knowledge Transfer for Language Model Adaptation to Materials Science
%A Kim, Yeachan
%A Park, Jun-Hyung
%A Kim, SungHo
%A Park, Juhyeong
%A Kim, Sangyun
%A Lee, SangKeun
%Y Dernoncourt, Franck
%Y Preoţiuc-Pietro, Daniel
%Y Shimorina, Anastasia
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, US
%F kim-etal-2024-seed
%X Materials science is an interdisciplinary field focused on studying and discovering materials around us. However, due to the vast space of materials, datasets in this field are typically scarce and have limited coverage. This inherent limitation makes current adaptation methods less effective when adapting pre-trained language models (PLMs) to materials science, as these methods rely heavily on the frequency information from limited downstream datasets. In this paper, we propose Semantic Knowledge Transfer (SEED), a novel vocabulary expansion method to adapt the pre-trained language models for materials science. The core strategy of SEED is to transfer the materials knowledge of lightweight embeddings into the PLMs. To this end, we introduce knowledge bridge networks, which learn to transfer the latent knowledge of the materials embeddings into ones compatible with PLMs. By expanding the embedding layer of PLMs with these transformed embeddings, PLMs can comprehensively understand the complex terminology associated with materials science. We conduct extensive experiments across a broad range of materials-related benchmarks. Comprehensive evaluation results convincingly demonstrate that SEED mitigates the mentioned limitations of previous adaptation methods, showcasing the efficacy of transferring embedding knowledge into PLMs.
%U https://aclanthology.org/2024.emnlp-industry.31
%P 421-428
Markdown (Informal)
[SEED: Semantic Knowledge Transfer for Language Model Adaptation to Materials Science](https://aclanthology.org/2024.emnlp-industry.31) (Kim et al., EMNLP 2024)
ACL