@inproceedings{park-etal-2024-moltres,
title = "{M}ol{TRES}: Improving Chemical Language Representation Learning for Molecular Property Prediction",
author = "Park, Jun-Hyung and
Kim, Yeachan and
Lee, Mingyu and
Park, Hyuntae and
Lee, SangKeun",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.788/",
doi = "10.18653/v1/2024.emnlp-main.788",
pages = "14241--14254",
abstract = "Chemical representation learning has gained increasing interest due to the limited availability of supervised data in fields such as drug and materials design. This interest particularly extends to chemical language representation learning, which involves pre-training Transformers on SMILES sequences - textual descriptors of molecules. Despite its success in molecular property prediction, current practices often lead to overfitting and limited scalability due to early convergence. In this paper, we introduce a novel chemical language representation learning framework, called MolTRES, to address these issues. MolTRES incorporates generator-discriminator training, allowing the model to learn from more challenging examples that require structural understanding. In addition, we enrich molecular representations by transferring knowledge from scientific literature by integrating external materials embedding. Experimental results show that our model outperforms existing state-of-the-art models on popular molecular property prediction tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-etal-2024-moltres">
<titleInfo>
<title>MolTRES: Improving Chemical Language Representation Learning for Molecular Property Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jun-Hyung</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yeachan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyu</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyuntae</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SangKeun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Chemical representation learning has gained increasing interest due to the limited availability of supervised data in fields such as drug and materials design. This interest particularly extends to chemical language representation learning, which involves pre-training Transformers on SMILES sequences - textual descriptors of molecules. Despite its success in molecular property prediction, current practices often lead to overfitting and limited scalability due to early convergence. In this paper, we introduce a novel chemical language representation learning framework, called MolTRES, to address these issues. MolTRES incorporates generator-discriminator training, allowing the model to learn from more challenging examples that require structural understanding. In addition, we enrich molecular representations by transferring knowledge from scientific literature by integrating external materials embedding. Experimental results show that our model outperforms existing state-of-the-art models on popular molecular property prediction tasks.</abstract>
<identifier type="citekey">park-etal-2024-moltres</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.788</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.788/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>14241</start>
<end>14254</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MolTRES: Improving Chemical Language Representation Learning for Molecular Property Prediction
%A Park, Jun-Hyung
%A Kim, Yeachan
%A Lee, Mingyu
%A Park, Hyuntae
%A Lee, SangKeun
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F park-etal-2024-moltres
%X Chemical representation learning has gained increasing interest due to the limited availability of supervised data in fields such as drug and materials design. This interest particularly extends to chemical language representation learning, which involves pre-training Transformers on SMILES sequences - textual descriptors of molecules. Despite its success in molecular property prediction, current practices often lead to overfitting and limited scalability due to early convergence. In this paper, we introduce a novel chemical language representation learning framework, called MolTRES, to address these issues. MolTRES incorporates generator-discriminator training, allowing the model to learn from more challenging examples that require structural understanding. In addition, we enrich molecular representations by transferring knowledge from scientific literature by integrating external materials embedding. Experimental results show that our model outperforms existing state-of-the-art models on popular molecular property prediction tasks.
%R 10.18653/v1/2024.emnlp-main.788
%U https://aclanthology.org/2024.emnlp-main.788/
%U https://doi.org/10.18653/v1/2024.emnlp-main.788
%P 14241-14254
Markdown (Informal)
[MolTRES: Improving Chemical Language Representation Learning for Molecular Property Prediction](https://aclanthology.org/2024.emnlp-main.788/) (Park et al., EMNLP 2024)
ACL