@inproceedings{liu-yu-2026-semtoken,
title = "{S}em{T}oken: Semantic-Aware Tokenization for Efficient Long-Context Language Models",
author = "Liu, Dong and
Yu, Yanxuan",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.1/",
pages = "1--12",
ISBN = "979-8-89176-413-2",
abstract = "Long-context language models face efficiency challenges as context lengths expand. Traditional tokenization methods like BPE operate on frequency statistics, ignoring semantic structure and over-tokenizing redundant spans. We propose \textbf{SemToken}, a semantic-aware tokenization framework that adaptively compresses token sequences based on semantic density. SemToken uses lightweight encoders to identify and merge semantically equivalent spans, allocates variable granularity based on local semantic density, and dynamically adjusts token budgets during generation. Evaluations on WikiText-103, LongBench, and BookSum demonstrate 2.4$\times$ token reduction, 1.9$\times$ inference speedup, and 67{\%} memory reduction while preserving or improving model quality. SemToken integrates seamlessly with existing models and achieves multiplicative benefits when combined with FlashAttention (up to 2.7$\times$ total speedup)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-yu-2026-semtoken">
<titleInfo>
<title>SemToken: Semantic-Aware Tokenization for Efficient Long-Context Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxuan</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>Long-context language models face efficiency challenges as context lengths expand. Traditional tokenization methods like BPE operate on frequency statistics, ignoring semantic structure and over-tokenizing redundant spans. We propose SemToken, a semantic-aware tokenization framework that adaptively compresses token sequences based on semantic density. SemToken uses lightweight encoders to identify and merge semantically equivalent spans, allocates variable granularity based on local semantic density, and dynamically adjusts token budgets during generation. Evaluations on WikiText-103, LongBench, and BookSum demonstrate 2.4\times token reduction, 1.9\times inference speedup, and 67% memory reduction while preserving or improving model quality. SemToken integrates seamlessly with existing models and achieves multiplicative benefits when combined with FlashAttention (up to 2.7\times total speedup).</abstract>
<identifier type="citekey">liu-yu-2026-semtoken</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.1/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>1</start>
<end>12</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SemToken: Semantic-Aware Tokenization for Efficient Long-Context Language Models
%A Liu, Dong
%A Yu, Yanxuan
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F liu-yu-2026-semtoken
%X Long-context language models face efficiency challenges as context lengths expand. Traditional tokenization methods like BPE operate on frequency statistics, ignoring semantic structure and over-tokenizing redundant spans. We propose SemToken, a semantic-aware tokenization framework that adaptively compresses token sequences based on semantic density. SemToken uses lightweight encoders to identify and merge semantically equivalent spans, allocates variable granularity based on local semantic density, and dynamically adjusts token budgets during generation. Evaluations on WikiText-103, LongBench, and BookSum demonstrate 2.4\times token reduction, 1.9\times inference speedup, and 67% memory reduction while preserving or improving model quality. SemToken integrates seamlessly with existing models and achieves multiplicative benefits when combined with FlashAttention (up to 2.7\times total speedup).
%U https://aclanthology.org/2026.starsem-conference.1/
%P 1-12
Markdown (Informal)
[SemToken: Semantic-Aware Tokenization for Efficient Long-Context Language Models](https://aclanthology.org/2026.starsem-conference.1/) (Liu & Yu, *SEM 2026)
ACL