@inproceedings{syvokon-2026-dictionary,
title = "Dictionary-Based Speculative Decoding for Non-{L}atin-Script Languages",
author = "Syvokon, Oleksiy",
editor = "Romanyshyn, Mariana",
booktitle = "Proceedings of the Fifth {U}krainian Natural Language Processing Conference ({UNLP} 2026)",
month = may,
year = "2026",
address = "Lviv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.unlp-1.15/",
pages = "169--183",
ISBN = "979-8-89176-359-3",
abstract = "Large language models tokenize non-Latin-script languagesinefficiently: a single word in Ukrainian or Crimean Tatar is split intotwo to three times as many tokens as its English equivalent. We propose{\_}dictionary-based speculative decoding{\_} (DictSpec), which acceleratesinference by proposing draft continuations from a static n-gram lookuptable built offline from an unlabeled corpus. The lookup table requiresno trainable parameters or GPU resources, is inexpensive to construct,adds under 5 MB of memory overhead, and can be reused across modelsthat share a tokenizer. We evaluate DictSpec on Ukrainian and Crimean Tatar(Cyrillic and Latin scripts), implementing a vLLM plugin to benchmarkfive models ranging from 3B to 70B parameters on consumer- andserver-grade GPUs. In controlled emulation, DictSpec reduces verificationsteps by up to 1.65{\texttimes}, with gains correlating substantially with tokenizerfertility. In live vLLM serving, pure DictSpec gives modest speedups,while a hybrid with prompt-local n-gram speculation reaches up to 1.76{\texttimes}.We release our code and vLLM plugin as opensource."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="syvokon-2026-dictionary">
<titleInfo>
<title>Dictionary-Based Speculative Decoding for Non-Latin-Script Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oleksiy</namePart>
<namePart type="family">Syvokon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Lviv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-359-3</identifier>
</relatedItem>
<abstract>Large language models tokenize non-Latin-script languagesinefficiently: a single word in Ukrainian or Crimean Tatar is split intotwo to three times as many tokens as its English equivalent. We propose_dictionary-based speculative decoding_ (DictSpec), which acceleratesinference by proposing draft continuations from a static n-gram lookuptable built offline from an unlabeled corpus. The lookup table requiresno trainable parameters or GPU resources, is inexpensive to construct,adds under 5 MB of memory overhead, and can be reused across modelsthat share a tokenizer. We evaluate DictSpec on Ukrainian and Crimean Tatar(Cyrillic and Latin scripts), implementing a vLLM plugin to benchmarkfive models ranging from 3B to 70B parameters on consumer- andserver-grade GPUs. In controlled emulation, DictSpec reduces verificationsteps by up to 1.65×, with gains correlating substantially with tokenizerfertility. In live vLLM serving, pure DictSpec gives modest speedups,while a hybrid with prompt-local n-gram speculation reaches up to 1.76×.We release our code and vLLM plugin as opensource.</abstract>
<identifier type="citekey">syvokon-2026-dictionary</identifier>
<location>
<url>https://aclanthology.org/2026.unlp-1.15/</url>
</location>
<part>
<date>2026-05</date>
<extent unit="page">
<start>169</start>
<end>183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dictionary-Based Speculative Decoding for Non-Latin-Script Languages
%A Syvokon, Oleksiy
%Y Romanyshyn, Mariana
%S Proceedings of the Fifth Ukrainian Natural Language Processing Conference (UNLP 2026)
%D 2026
%8 May
%I Association for Computational Linguistics
%C Lviv, Ukraine
%@ 979-8-89176-359-3
%F syvokon-2026-dictionary
%X Large language models tokenize non-Latin-script languagesinefficiently: a single word in Ukrainian or Crimean Tatar is split intotwo to three times as many tokens as its English equivalent. We propose_dictionary-based speculative decoding_ (DictSpec), which acceleratesinference by proposing draft continuations from a static n-gram lookuptable built offline from an unlabeled corpus. The lookup table requiresno trainable parameters or GPU resources, is inexpensive to construct,adds under 5 MB of memory overhead, and can be reused across modelsthat share a tokenizer. We evaluate DictSpec on Ukrainian and Crimean Tatar(Cyrillic and Latin scripts), implementing a vLLM plugin to benchmarkfive models ranging from 3B to 70B parameters on consumer- andserver-grade GPUs. In controlled emulation, DictSpec reduces verificationsteps by up to 1.65×, with gains correlating substantially with tokenizerfertility. In live vLLM serving, pure DictSpec gives modest speedups,while a hybrid with prompt-local n-gram speculation reaches up to 1.76×.We release our code and vLLM plugin as opensource.
%U https://aclanthology.org/2026.unlp-1.15/
%P 169-183
Markdown (Informal)
[Dictionary-Based Speculative Decoding for Non-Latin-Script Languages](https://aclanthology.org/2026.unlp-1.15/) (Syvokon, UNLP 2026)
ACL