@inproceedings{fono-etal-2024-embible,
title = "Embible: Reconstruction of {A}ncient {H}ebrew and {A}ramaic Texts Using Transformers",
author = "Fono, Niv and
Moshayof, Harel and
Karol, Eldar and
Assraf, Itai and
Last, Mark",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.56",
pages = "846--852",
abstract = "Hebrew and Aramaic inscriptions serve as an essential source of information on the ancient history of the Near East. Unfortunately, some parts of the inscribed texts become illegible over time. Special experts, called epigraphists, use time-consuming manual procedures to estimate the missing content. This problem can be considered an extended masked language modeling task, where the damaged content can comprise single characters, character n-grams (partial words), single complete words, and multi-word n-grams.This study is the first attempt to apply the masked language modeling approach to corrupted inscriptions in Hebrew and Aramaic languages, both using the Hebrew alphabet consisting mostly of consonant symbols. In our experiments, we evaluate several transformer-based models, which are fine-tuned on the Biblical texts and tested on three different percentages of randomly masked parts in the testing corpus. For any masking percentage, the highest text completion accuracy is obtained with a novel ensemble of word and character prediction models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fono-etal-2024-embible">
<titleInfo>
<title>Embible: Reconstruction of Ancient Hebrew and Aramaic Texts Using Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niv</namePart>
<namePart type="family">Fono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harel</namePart>
<namePart type="family">Moshayof</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eldar</namePart>
<namePart type="family">Karol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Itai</namePart>
<namePart type="family">Assraf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Last</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hebrew and Aramaic inscriptions serve as an essential source of information on the ancient history of the Near East. Unfortunately, some parts of the inscribed texts become illegible over time. Special experts, called epigraphists, use time-consuming manual procedures to estimate the missing content. This problem can be considered an extended masked language modeling task, where the damaged content can comprise single characters, character n-grams (partial words), single complete words, and multi-word n-grams.This study is the first attempt to apply the masked language modeling approach to corrupted inscriptions in Hebrew and Aramaic languages, both using the Hebrew alphabet consisting mostly of consonant symbols. In our experiments, we evaluate several transformer-based models, which are fine-tuned on the Biblical texts and tested on three different percentages of randomly masked parts in the testing corpus. For any masking percentage, the highest text completion accuracy is obtained with a novel ensemble of word and character prediction models.</abstract>
<identifier type="citekey">fono-etal-2024-embible</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.56</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>846</start>
<end>852</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Embible: Reconstruction of Ancient Hebrew and Aramaic Texts Using Transformers
%A Fono, Niv
%A Moshayof, Harel
%A Karol, Eldar
%A Assraf, Itai
%A Last, Mark
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F fono-etal-2024-embible
%X Hebrew and Aramaic inscriptions serve as an essential source of information on the ancient history of the Near East. Unfortunately, some parts of the inscribed texts become illegible over time. Special experts, called epigraphists, use time-consuming manual procedures to estimate the missing content. This problem can be considered an extended masked language modeling task, where the damaged content can comprise single characters, character n-grams (partial words), single complete words, and multi-word n-grams.This study is the first attempt to apply the masked language modeling approach to corrupted inscriptions in Hebrew and Aramaic languages, both using the Hebrew alphabet consisting mostly of consonant symbols. In our experiments, we evaluate several transformer-based models, which are fine-tuned on the Biblical texts and tested on three different percentages of randomly masked parts in the testing corpus. For any masking percentage, the highest text completion accuracy is obtained with a novel ensemble of word and character prediction models.
%U https://aclanthology.org/2024.findings-eacl.56
%P 846-852
Markdown (Informal)
[Embible: Reconstruction of Ancient Hebrew and Aramaic Texts Using Transformers](https://aclanthology.org/2024.findings-eacl.56) (Fono et al., Findings 2024)
ACL