@inproceedings{coltekin-etal-2026-tokenisation,
title = "Tokenisation of {T}urkic Copula Constructions in {U}niversal {D}ependencies",
author = "Coltekin, Cagri and
Akkurt, Furkan and
Chontaeva, Bermet and
Eslami, Soudabeh and
Ivanova, Sardana and
Dzhumalieva, Gulnura and
Kasieva, Aida and
Mus, Nikolett and
Washington, Jonathan",
editor = {Oflazer, Kemal and
K{\"o}ksal, Abdullatif and
Varol, Onur},
booktitle = "Proceedings of the Second Workshop Natural Language Processing for {T}urkic Languages ({SIGTURK} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.sigturk-1.14/",
pages = "172--178",
ISBN = "979-8-89176-370-8",
abstract = "Identifying units, `syntactic words', for morphosyntactic analysis is important yet challenging for morphologically rich languages. In this paper we propose a set of guiding principles to determine units of morphosyntactic analysis, and apply them to the case of copular constructions in Turkic languages, in the context of Universal Dependencies (UD) framework. We also provide a survey of the practice in the Turkic UD treebanks published to date, and discuss the advantages and disadvantages of the proposed tokenisation for a selection of Turkic languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coltekin-etal-2026-tokenisation">
<titleInfo>
<title>Tokenisation of Turkic Copula Constructions in Universal Dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cagri</namePart>
<namePart type="family">Coltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Furkan</namePart>
<namePart type="family">Akkurt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bermet</namePart>
<namePart type="family">Chontaeva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soudabeh</namePart>
<namePart type="family">Eslami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sardana</namePart>
<namePart type="family">Ivanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gulnura</namePart>
<namePart type="family">Dzhumalieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Kasieva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolett</namePart>
<namePart type="family">Mus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Washington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kemal</namePart>
<namePart type="family">Oflazer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdullatif</namePart>
<namePart type="family">Köksal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Onur</namePart>
<namePart type="family">Varol</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-370-8</identifier>
</relatedItem>
<abstract>Identifying units, ‘syntactic words’, for morphosyntactic analysis is important yet challenging for morphologically rich languages. In this paper we propose a set of guiding principles to determine units of morphosyntactic analysis, and apply them to the case of copular constructions in Turkic languages, in the context of Universal Dependencies (UD) framework. We also provide a survey of the practice in the Turkic UD treebanks published to date, and discuss the advantages and disadvantages of the proposed tokenisation for a selection of Turkic languages.</abstract>
<identifier type="citekey">coltekin-etal-2026-tokenisation</identifier>
<location>
<url>https://aclanthology.org/2026.sigturk-1.14/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>172</start>
<end>178</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tokenisation of Turkic Copula Constructions in Universal Dependencies
%A Coltekin, Cagri
%A Akkurt, Furkan
%A Chontaeva, Bermet
%A Eslami, Soudabeh
%A Ivanova, Sardana
%A Dzhumalieva, Gulnura
%A Kasieva, Aida
%A Mus, Nikolett
%A Washington, Jonathan
%Y Oflazer, Kemal
%Y Köksal, Abdullatif
%Y Varol, Onur
%S Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-370-8
%F coltekin-etal-2026-tokenisation
%X Identifying units, ‘syntactic words’, for morphosyntactic analysis is important yet challenging for morphologically rich languages. In this paper we propose a set of guiding principles to determine units of morphosyntactic analysis, and apply them to the case of copular constructions in Turkic languages, in the context of Universal Dependencies (UD) framework. We also provide a survey of the practice in the Turkic UD treebanks published to date, and discuss the advantages and disadvantages of the proposed tokenisation for a selection of Turkic languages.
%U https://aclanthology.org/2026.sigturk-1.14/
%P 172-178
Markdown (Informal)
[Tokenisation of Turkic Copula Constructions in Universal Dependencies](https://aclanthology.org/2026.sigturk-1.14/) (Coltekin et al., SIGTURK 2026)
ACL
- Cagri Coltekin, Furkan Akkurt, Bermet Chontaeva, Soudabeh Eslami, Sardana Ivanova, Gulnura Dzhumalieva, Aida Kasieva, Nikolett Mus, and Jonathan Washington. 2026. Tokenisation of Turkic Copula Constructions in Universal Dependencies. In Proceedings of the Second Workshop Natural Language Processing for Turkic Languages (SIGTURK 2026), pages 172–178, Rabat, Morocco. Association for Computational Linguistics.