@inproceedings{pei-etal-2024-enhanced,
title = "Enhanced {B}io{T}5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble",
author = "Pei, Qizhi and
Wu, Lijun and
Gao, Kaiyuan and
Zhu, Jinhua and
Yan, Rui",
editor = "Edwards, Carl and
Wang, Qingyun and
Li, Manling and
Zhao, Lawrence and
Hope, Tom and
Ji, Heng",
booktitle = "Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.langmol-1.6",
doi = "10.18653/v1/2024.langmol-1.6",
pages = "48--54",
abstract = "This paper presents our enhanced BioT5+ method for the Language + Molecules shared task at the ACL 2024 Workshop. The task involves {``}translating{''} between molecules and natural language, including molecule captioning and text-based molecule generation using the \textit{L+M-24} dataset. Our method consists of three stages. In the first stage, we distill data from various models. In the second stage, combined with \textit{extra} version of the provided dataset, we train diverse models for subsequent voting ensemble.We also adopt Transductive Ensemble Learning (TEL) to enhance these base models. Lastly, all models are integrated using a voting ensemble method. Experimental results demonstrate that BioT5+ achieves superior performance on \textit{L+M-24} dataset. On the final leaderboard, our method (team name: \textbf{qizhipei}) ranks \textbf{first} in the text-based molecule generation task and \textbf{second} in the molecule captioning task, highlighting its efficacy and robustness in translating between molecules and natural language. The pre-trained BioT5+ models are available at \url{https://github.com/QizhiPei/BioT5}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pei-etal-2024-enhanced">
<titleInfo>
<title>Enhanced BioT5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qizhi</namePart>
<namePart type="family">Pei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lijun</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaiyuan</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinhua</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carl</namePart>
<namePart type="family">Edwards</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingyun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manling</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lawrence</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Hope</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents our enhanced BioT5+ method for the Language + Molecules shared task at the ACL 2024 Workshop. The task involves “translating” between molecules and natural language, including molecule captioning and text-based molecule generation using the L+M-24 dataset. Our method consists of three stages. In the first stage, we distill data from various models. In the second stage, combined with extra version of the provided dataset, we train diverse models for subsequent voting ensemble.We also adopt Transductive Ensemble Learning (TEL) to enhance these base models. Lastly, all models are integrated using a voting ensemble method. Experimental results demonstrate that BioT5+ achieves superior performance on L+M-24 dataset. On the final leaderboard, our method (team name: qizhipei) ranks first in the text-based molecule generation task and second in the molecule captioning task, highlighting its efficacy and robustness in translating between molecules and natural language. The pre-trained BioT5+ models are available at https://github.com/QizhiPei/BioT5.</abstract>
<identifier type="citekey">pei-etal-2024-enhanced</identifier>
<identifier type="doi">10.18653/v1/2024.langmol-1.6</identifier>
<location>
<url>https://aclanthology.org/2024.langmol-1.6</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>48</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enhanced BioT5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble
%A Pei, Qizhi
%A Wu, Lijun
%A Gao, Kaiyuan
%A Zhu, Jinhua
%A Yan, Rui
%Y Edwards, Carl
%Y Wang, Qingyun
%Y Li, Manling
%Y Zhao, Lawrence
%Y Hope, Tom
%Y Ji, Heng
%S Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F pei-etal-2024-enhanced
%X This paper presents our enhanced BioT5+ method for the Language + Molecules shared task at the ACL 2024 Workshop. The task involves “translating” between molecules and natural language, including molecule captioning and text-based molecule generation using the L+M-24 dataset. Our method consists of three stages. In the first stage, we distill data from various models. In the second stage, combined with extra version of the provided dataset, we train diverse models for subsequent voting ensemble.We also adopt Transductive Ensemble Learning (TEL) to enhance these base models. Lastly, all models are integrated using a voting ensemble method. Experimental results demonstrate that BioT5+ achieves superior performance on L+M-24 dataset. On the final leaderboard, our method (team name: qizhipei) ranks first in the text-based molecule generation task and second in the molecule captioning task, highlighting its efficacy and robustness in translating between molecules and natural language. The pre-trained BioT5+ models are available at https://github.com/QizhiPei/BioT5.
%R 10.18653/v1/2024.langmol-1.6
%U https://aclanthology.org/2024.langmol-1.6
%U https://doi.org/10.18653/v1/2024.langmol-1.6
%P 48-54
Markdown (Informal)
[Enhanced BioT5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble](https://aclanthology.org/2024.langmol-1.6) (Pei et al., LangMol-WS 2024)
ACL