@inproceedings{sakhovskiy-etal-2026-inksight,
title = "{I}nk{S}ight: Towards {AI}-Aided Historical Manuscript Analysis",
author = "Sakhovskiy, Andrey and
Ulitin, Ivan and
Bojarskaja, Emilia and
Kokh, Vladimir and
Murtazin, Ruslan and
Novopoltsev, Maxim and
Budennyy, Semen",
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-demo.20/",
pages = "271--281",
ISBN = "979-8-89176-382-1",
abstract = "Large-scale scientific research on historical documents {---} particularly medieval Arabic manuscripts {---} remains challenging due to the need for advanced paleographic and linguistic training, the large volume of hand-written materials, and the absence of assisting software. In this paper, we propose InkSight, the first end-to-end Arabic manuscript analysis tool for manuscript-based analytics and research hypothesis testing. InkSight integrates three key components: (i) an Optical Character Recognition (OCR) module utilizing a Large Visual Language Model (LVLM); (ii) a lightweight document indexing and information retrieval module that enables query-based evidence retrieval from book-length manuscripts; and (iii) a flexible Large Language Model (LLM) prompting interface factually grounded to the given manuscript via Retrieval-Augmented Generation (RAG). Empirical evaluation on the existing KITAB OCR benchmark and our in-house dataset of ancient Arabic manuscripts has revealed that historical research can be effectively supported using smaller fine-tuned LVLMs without relying on larger proprietary models. The live web demo for InkSight is available freely at: https://inksight.ru and the source code for InkSight is publicly available at Github: https://github.com/ds-hub-sochi/InkSight-tool."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sakhovskiy-etal-2026-inksight">
<titleInfo>
<title>InkSight: Towards AI-Aided Historical Manuscript Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrey</namePart>
<namePart type="family">Sakhovskiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Ulitin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emilia</namePart>
<namePart type="family">Bojarskaja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Kokh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Murtazin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Novopoltsev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Semen</namePart>
<namePart type="family">Budennyy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jochen</namePart>
<namePart type="family">Leidner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-382-1</identifier>
</relatedItem>
<abstract>Large-scale scientific research on historical documents — particularly medieval Arabic manuscripts — remains challenging due to the need for advanced paleographic and linguistic training, the large volume of hand-written materials, and the absence of assisting software. In this paper, we propose InkSight, the first end-to-end Arabic manuscript analysis tool for manuscript-based analytics and research hypothesis testing. InkSight integrates three key components: (i) an Optical Character Recognition (OCR) module utilizing a Large Visual Language Model (LVLM); (ii) a lightweight document indexing and information retrieval module that enables query-based evidence retrieval from book-length manuscripts; and (iii) a flexible Large Language Model (LLM) prompting interface factually grounded to the given manuscript via Retrieval-Augmented Generation (RAG). Empirical evaluation on the existing KITAB OCR benchmark and our in-house dataset of ancient Arabic manuscripts has revealed that historical research can be effectively supported using smaller fine-tuned LVLMs without relying on larger proprietary models. The live web demo for InkSight is available freely at: https://inksight.ru and the source code for InkSight is publicly available at Github: https://github.com/ds-hub-sochi/InkSight-tool.</abstract>
<identifier type="citekey">sakhovskiy-etal-2026-inksight</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-demo.20/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>271</start>
<end>281</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T InkSight: Towards AI-Aided Historical Manuscript Analysis
%A Sakhovskiy, Andrey
%A Ulitin, Ivan
%A Bojarskaja, Emilia
%A Kokh, Vladimir
%A Murtazin, Ruslan
%A Novopoltsev, Maxim
%A Budennyy, Semen
%Y Croce, Danilo
%Y Leidner, Jochen
%Y Moosavi, Nafise Sadat
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-382-1
%F sakhovskiy-etal-2026-inksight
%X Large-scale scientific research on historical documents — particularly medieval Arabic manuscripts — remains challenging due to the need for advanced paleographic and linguistic training, the large volume of hand-written materials, and the absence of assisting software. In this paper, we propose InkSight, the first end-to-end Arabic manuscript analysis tool for manuscript-based analytics and research hypothesis testing. InkSight integrates three key components: (i) an Optical Character Recognition (OCR) module utilizing a Large Visual Language Model (LVLM); (ii) a lightweight document indexing and information retrieval module that enables query-based evidence retrieval from book-length manuscripts; and (iii) a flexible Large Language Model (LLM) prompting interface factually grounded to the given manuscript via Retrieval-Augmented Generation (RAG). Empirical evaluation on the existing KITAB OCR benchmark and our in-house dataset of ancient Arabic manuscripts has revealed that historical research can be effectively supported using smaller fine-tuned LVLMs without relying on larger proprietary models. The live web demo for InkSight is available freely at: https://inksight.ru and the source code for InkSight is publicly available at Github: https://github.com/ds-hub-sochi/InkSight-tool.
%U https://aclanthology.org/2026.eacl-demo.20/
%P 271-281
Markdown (Informal)
[InkSight: Towards AI-Aided Historical Manuscript Analysis](https://aclanthology.org/2026.eacl-demo.20/) (Sakhovskiy et al., EACL 2026)
ACL
- Andrey Sakhovskiy, Ivan Ulitin, Emilia Bojarskaja, Vladimir Kokh, Ruslan Murtazin, Maxim Novopoltsev, and Semen Budennyy. 2026. InkSight: Towards AI-Aided Historical Manuscript Analysis. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 271–281, Rabat, Marocco. Association for Computational Linguistics.