@inproceedings{aldarrab-may-2022-segmenting,
title = "Segmenting Numerical Substitution Ciphers",
author = "Aldarrab, Nada and
May, Jonathan",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.44",
doi = "10.18653/v1/2022.emnlp-main.44",
pages = "706--714",
abstract = "Deciphering historical substitution ciphers is a challenging problem. Example problems that have been previously studied include detecting cipher type, detecting plaintext language, and acquiring the substitution key for segmented ciphers. However, attacking unsegmented ciphers is still a challenging task. Segmentation (i.e. finding substitution units) is essential for cracking those ciphers. In this work, we propose the first automatic methods to segment those ciphers using Byte Pair Encoding (BPE) and unigram language models. Our methods achieve an average segmentation error of 2{\%} on 100 randomly-generated monoalphabetic ciphers and 27{\%} on 3 real historical homophonic ciphers. We also propose a method for solving non-deterministic ciphers with existing keys using a lattice and a pretrained language model. Our method leads to the full solution of the IA cipher; a real historical cipher that has not been fully solved until this work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aldarrab-may-2022-segmenting">
<titleInfo>
<title>Segmenting Numerical Substitution Ciphers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nada</namePart>
<namePart type="family">Aldarrab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Deciphering historical substitution ciphers is a challenging problem. Example problems that have been previously studied include detecting cipher type, detecting plaintext language, and acquiring the substitution key for segmented ciphers. However, attacking unsegmented ciphers is still a challenging task. Segmentation (i.e. finding substitution units) is essential for cracking those ciphers. In this work, we propose the first automatic methods to segment those ciphers using Byte Pair Encoding (BPE) and unigram language models. Our methods achieve an average segmentation error of 2% on 100 randomly-generated monoalphabetic ciphers and 27% on 3 real historical homophonic ciphers. We also propose a method for solving non-deterministic ciphers with existing keys using a lattice and a pretrained language model. Our method leads to the full solution of the IA cipher; a real historical cipher that has not been fully solved until this work.</abstract>
<identifier type="citekey">aldarrab-may-2022-segmenting</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.44</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.44</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>706</start>
<end>714</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Segmenting Numerical Substitution Ciphers
%A Aldarrab, Nada
%A May, Jonathan
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F aldarrab-may-2022-segmenting
%X Deciphering historical substitution ciphers is a challenging problem. Example problems that have been previously studied include detecting cipher type, detecting plaintext language, and acquiring the substitution key for segmented ciphers. However, attacking unsegmented ciphers is still a challenging task. Segmentation (i.e. finding substitution units) is essential for cracking those ciphers. In this work, we propose the first automatic methods to segment those ciphers using Byte Pair Encoding (BPE) and unigram language models. Our methods achieve an average segmentation error of 2% on 100 randomly-generated monoalphabetic ciphers and 27% on 3 real historical homophonic ciphers. We also propose a method for solving non-deterministic ciphers with existing keys using a lattice and a pretrained language model. Our method leads to the full solution of the IA cipher; a real historical cipher that has not been fully solved until this work.
%R 10.18653/v1/2022.emnlp-main.44
%U https://aclanthology.org/2022.emnlp-main.44
%U https://doi.org/10.18653/v1/2022.emnlp-main.44
%P 706-714
Markdown (Informal)
[Segmenting Numerical Substitution Ciphers](https://aclanthology.org/2022.emnlp-main.44) (Aldarrab & May, EMNLP 2022)
ACL
- Nada Aldarrab and Jonathan May. 2022. Segmenting Numerical Substitution Ciphers. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 706–714, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.