@inproceedings{ozates-etal-2024-dependency,
title = "Dependency Annotation of {O}ttoman {T}urkish with Multilingual {BERT}",
author = {{\"O}zate{\c{s}}, {\c{S}}aziye and
T{\i}ra{\c{s}}, Tar{\i}k and
Gen{\c{c}}, Efe and
Bilgin Tasdemir, Esma},
editor = "Henning, Sophie and
Stede, Manfred",
booktitle = "Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.law-1.18",
pages = "188--196",
abstract = "This study introduces a pretrained large language model-based annotation methodology of the first dependency treebank in Ottoman Turkish. Our experimental results show that, through iteratively i) pseudo-annotating data using a multilingual BERT-based parsing model, ii) manually correcting the pseudo-annotations, and iii) fine-tuning the parsing model with the corrected annotations, we speed up and simplify the challenging dependency annotation process. The resulting treebank, that will be a part of the Universal Dependencies (UD) project, will facilitate automated analysis of Ottoman Turkish documents, unlocking the linguistic richness embedded in this historical heritage.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ozates-etal-2024-dependency">
<titleInfo>
<title>Dependency Annotation of Ottoman Turkish with Multilingual BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Şaziye</namePart>
<namePart type="family">Özateş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tarık</namePart>
<namePart type="family">Tıraş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Efe</namePart>
<namePart type="family">Genç</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Esma</namePart>
<namePart type="family">Bilgin Tasdemir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sophie</namePart>
<namePart type="family">Henning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manfred</namePart>
<namePart type="family">Stede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study introduces a pretrained large language model-based annotation methodology of the first dependency treebank in Ottoman Turkish. Our experimental results show that, through iteratively i) pseudo-annotating data using a multilingual BERT-based parsing model, ii) manually correcting the pseudo-annotations, and iii) fine-tuning the parsing model with the corrected annotations, we speed up and simplify the challenging dependency annotation process. The resulting treebank, that will be a part of the Universal Dependencies (UD) project, will facilitate automated analysis of Ottoman Turkish documents, unlocking the linguistic richness embedded in this historical heritage.</abstract>
<identifier type="citekey">ozates-etal-2024-dependency</identifier>
<location>
<url>https://aclanthology.org/2024.law-1.18</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>188</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dependency Annotation of Ottoman Turkish with Multilingual BERT
%A Özateş, Şaziye
%A Tıraş, Tarık
%A Genç, Efe
%A Bilgin Tasdemir, Esma
%Y Henning, Sophie
%Y Stede, Manfred
%S Proceedings of The 18th Linguistic Annotation Workshop (LAW-XVIII)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F ozates-etal-2024-dependency
%X This study introduces a pretrained large language model-based annotation methodology of the first dependency treebank in Ottoman Turkish. Our experimental results show that, through iteratively i) pseudo-annotating data using a multilingual BERT-based parsing model, ii) manually correcting the pseudo-annotations, and iii) fine-tuning the parsing model with the corrected annotations, we speed up and simplify the challenging dependency annotation process. The resulting treebank, that will be a part of the Universal Dependencies (UD) project, will facilitate automated analysis of Ottoman Turkish documents, unlocking the linguistic richness embedded in this historical heritage.
%U https://aclanthology.org/2024.law-1.18
%P 188-196
Markdown (Informal)
[Dependency Annotation of Ottoman Turkish with Multilingual BERT](https://aclanthology.org/2024.law-1.18) (Özateş et al., LAW-WS 2024)
ACL