@inproceedings{edwards-etal-2024-l,
title = "{L}+{M}-24: Building a Dataset for {L}anguage+{M}olecules @ {ACL} 2024",
author = "Edwards, Carl and
Wang, Qingyun and
Zhao, Lawrence and
Ji, Heng",
editor = "Edwards, Carl and
Wang, Qingyun and
Li, Manling and
Zhao, Lawrence and
Hope, Tom and
Ji, Heng",
booktitle = "Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.langmol-1.1",
doi = "10.18653/v1/2024.langmol-1.1",
pages = "1--9",
abstract = "Language-molecule models have emerged as an exciting direction for molecular discovery and understanding. However, training these models is challenging due to the scarcity of molecule-language pair datasets. At this point, datasets have been released which are 1) small and scraped from existing databases, 2) large but noisy and constructed by performing entity linking on the scientific literature, and 3) built by converting property prediction datasets to natural language using templates. In this document, we detail the L+M-24 dataset, which has been created for the Language + Molecules Workshop shared task at ACL 2024. In particular, L+M-24 is designed to focus on three key benefits of natural language in molecule design: compositionality, functionality, and abstraction",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="edwards-etal-2024-l">
<titleInfo>
<title>L+M-24: Building a Dataset for Language+Molecules @ ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carl</namePart>
<namePart type="family">Edwards</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lawrence</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carl</namePart>
<namePart type="family">Edwards</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lawrence</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language-molecule models have emerged as an exciting direction for molecular discovery and understanding. However, training these models is challenging due to the scarcity of molecule-language pair datasets. At this point, datasets have been released which are 1) small and scraped from existing databases, 2) large but noisy and constructed by performing entity linking on the scientific literature, and 3) built by converting property prediction datasets to natural language using templates. In this document, we detail the L+M-24 dataset, which has been created for the Language + Molecules Workshop shared task at ACL 2024. In particular, L+M-24 is designed to focus on three key benefits of natural language in molecule design: compositionality, functionality, and abstraction</abstract>
<identifier type="citekey">edwards-etal-2024-l</identifier>
<identifier type="doi">10.18653/v1/2024.langmol-1.1</identifier>
<location>
<url>https://aclanthology.org/2024.langmol-1.1</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T L+M-24: Building a Dataset for Language+Molecules @ ACL 2024
%A Edwards, Carl
%A Wang, Qingyun
%A Zhao, Lawrence
%A Ji, Heng
%Y Edwards, Carl
%Y Wang, Qingyun
%Y Li, Manling
%Y Zhao, Lawrence
%Y Hope, Tom
%Y Ji, Heng
%S Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F edwards-etal-2024-l
%X Language-molecule models have emerged as an exciting direction for molecular discovery and understanding. However, training these models is challenging due to the scarcity of molecule-language pair datasets. At this point, datasets have been released which are 1) small and scraped from existing databases, 2) large but noisy and constructed by performing entity linking on the scientific literature, and 3) built by converting property prediction datasets to natural language using templates. In this document, we detail the L+M-24 dataset, which has been created for the Language + Molecules Workshop shared task at ACL 2024. In particular, L+M-24 is designed to focus on three key benefits of natural language in molecule design: compositionality, functionality, and abstraction
%R 10.18653/v1/2024.langmol-1.1
%U https://aclanthology.org/2024.langmol-1.1
%U https://doi.org/10.18653/v1/2024.langmol-1.1
%P 1-9
Markdown (Informal)
[L+M-24: Building a Dataset for Language+Molecules @ ACL 2024](https://aclanthology.org/2024.langmol-1.1) (Edwards et al., LangMol-WS 2024)
ACL