@inproceedings{karaisl-2023-question,
title = "A Question of Confidence: Using {OCR} Technology for Script analysis",
author = "Karaisl, Antonia",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Pirinen, Flammie and
Alnajjar, Khalid and
Miyagawa, So and
Bizzoni, Yuri and
Partanen, Niko and
Rueter, Jack},
booktitle = "Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2023",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nlp4dh-1.20",
pages = "162--171",
abstract = "The following article proposes a method employing the Tesseract OCR engine to aid palaeographic analysis and scribal identification. Repurposing the so-called confidence score provided by the OCR engine, different methods of visualization are used to surface differences between font families, script types and manuscript hands.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karaisl-2023-question">
<titleInfo>
<title>A Question of Confidence: Using OCR Technology for Script analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonia</namePart>
<namePart type="family">Karaisl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niko</namePart>
<namePart type="family">Partanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Rueter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The following article proposes a method employing the Tesseract OCR engine to aid palaeographic analysis and scribal identification. Repurposing the so-called confidence score provided by the OCR engine, different methods of visualization are used to surface differences between font families, script types and manuscript hands.</abstract>
<identifier type="citekey">karaisl-2023-question</identifier>
<location>
<url>https://aclanthology.org/2023.nlp4dh-1.20</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>162</start>
<end>171</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Question of Confidence: Using OCR Technology for Script analysis
%A Karaisl, Antonia
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Pirinen, Flammie
%Y Alnajjar, Khalid
%Y Miyagawa, So
%Y Bizzoni, Yuri
%Y Partanen, Niko
%Y Rueter, Jack
%S Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages
%D 2023
%8 December
%I Association for Computational Linguistics
%C Tokyo, Japan
%F karaisl-2023-question
%X The following article proposes a method employing the Tesseract OCR engine to aid palaeographic analysis and scribal identification. Repurposing the so-called confidence score provided by the OCR engine, different methods of visualization are used to surface differences between font families, script types and manuscript hands.
%U https://aclanthology.org/2023.nlp4dh-1.20
%P 162-171
Markdown (Informal)
[A Question of Confidence: Using OCR Technology for Script analysis](https://aclanthology.org/2023.nlp4dh-1.20) (Karaisl, NLP4DH-IWCLUL 2023)
ACL
- Antonia Karaisl. 2023. A Question of Confidence: Using OCR Technology for Script analysis. In Proceedings of the Joint 3rd International Conference on Natural Language Processing for Digital Humanities and 8th International Workshop on Computational Linguistics for Uralic Languages, pages 162–171, Tokyo, Japan. Association for Computational Linguistics.