@inproceedings{nam-etal-2026-fact,
title = "{FACT}: Functional Group Alignment and Consistency in Token Space for Structure-aware Molecular Representation Learning",
author = "Nam, Hyeonyeong and
Choi, Woojae and
Lee, Deok-Joong and
Son, Young-Han and
Lee, Sangwoon and
Kang, Bogyeong and
Jo, Eunjung and
Kam, Tae-Eui",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.56/",
pages = "695--703",
ISBN = "979-8-89176-434-7",
abstract = "Molecular representation learning aims to capture chemically meaningful structures for various downstream tasks such as accurate molecular property prediction. However, incorporating functional group (FG) information into SMILES-based models remains challenging. The absence of explicit alignment between graph-defined FG atom sets and tokens in sequence prevents complete substructure masking, while multiple valid SMILES forms of the same molecule lead to inconsistent FG representations in token space. To address these challenges, we propose FACT (Functional Group Alignment and Consistency in Token Space), an end-to-end framework for structure-aware SMILES-based representation learning. FACT introduces an atom?token alignment module for complete FG span masking during pre-training and enforces FG consistency across different SMILES forms during fine-tuning. Experiments on MoleculeNet benchmarks show that FACT achieves state-of-the-art or competitive performance on eight tasks, demonstrating the effectiveness of alignment and consistency learning for molecular representation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nam-etal-2026-fact">
<titleInfo>
<title>FACT: Functional Group Alignment and Consistency in Token Space for Structure-aware Molecular Representation Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hyeonyeong</namePart>
<namePart type="family">Nam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Woojae</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deok-Joong</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Young-Han</namePart>
<namePart type="family">Son</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sangwoon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bogyeong</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunjung</namePart>
<namePart type="family">Jo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tae-Eui</namePart>
<namePart type="family">Kam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>Molecular representation learning aims to capture chemically meaningful structures for various downstream tasks such as accurate molecular property prediction. However, incorporating functional group (FG) information into SMILES-based models remains challenging. The absence of explicit alignment between graph-defined FG atom sets and tokens in sequence prevents complete substructure masking, while multiple valid SMILES forms of the same molecule lead to inconsistent FG representations in token space. To address these challenges, we propose FACT (Functional Group Alignment and Consistency in Token Space), an end-to-end framework for structure-aware SMILES-based representation learning. FACT introduces an atom?token alignment module for complete FG span masking during pre-training and enforces FG consistency across different SMILES forms during fine-tuning. Experiments on MoleculeNet benchmarks show that FACT achieves state-of-the-art or competitive performance on eight tasks, demonstrating the effectiveness of alignment and consistency learning for molecular representation.</abstract>
<identifier type="citekey">nam-etal-2026-fact</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.56/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>695</start>
<end>703</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FACT: Functional Group Alignment and Consistency in Token Space for Structure-aware Molecular Representation Learning
%A Nam, Hyeonyeong
%A Choi, Woojae
%A Lee, Deok-Joong
%A Son, Young-Han
%A Lee, Sangwoon
%A Kang, Bogyeong
%A Jo, Eunjung
%A Kam, Tae-Eui
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F nam-etal-2026-fact
%X Molecular representation learning aims to capture chemically meaningful structures for various downstream tasks such as accurate molecular property prediction. However, incorporating functional group (FG) information into SMILES-based models remains challenging. The absence of explicit alignment between graph-defined FG atom sets and tokens in sequence prevents complete substructure masking, while multiple valid SMILES forms of the same molecule lead to inconsistent FG representations in token space. To address these challenges, we propose FACT (Functional Group Alignment and Consistency in Token Space), an end-to-end framework for structure-aware SMILES-based representation learning. FACT introduces an atom?token alignment module for complete FG span masking during pre-training and enforces FG consistency across different SMILES forms during fine-tuning. Experiments on MoleculeNet benchmarks show that FACT achieves state-of-the-art or competitive performance on eight tasks, demonstrating the effectiveness of alignment and consistency learning for molecular representation.
%U https://aclanthology.org/2026.bionlp-1.56/
%P 695-703
Markdown (Informal)
[FACT: Functional Group Alignment and Consistency in Token Space for Structure-aware Molecular Representation Learning](https://aclanthology.org/2026.bionlp-1.56/) (Nam et al., BioNLP 2026)
ACL
- Hyeonyeong Nam, Woojae Choi, Deok-Joong Lee, Young-Han Son, Sangwoon Lee, Bogyeong Kang, Eunjung Jo, and Tae-Eui Kam. 2026. FACT: Functional Group Alignment and Consistency in Token Space for Structure-aware Molecular Representation Learning. In BioNLP 2026, pages 695–703, San Diego, California. Association for Computational Linguistics.