@inproceedings{veitsman-hartmann-2025-recent,
title = "Recent Advancements and Challenges of {T}urkic {C}entral {A}sian Language Processing",
author = "Veitsman, Yana and
Hartmann, Mareike",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loreslm-1.25/",
pages = "309--324",
abstract = "Research in NLP for Central Asian Turkic languages - Kazakh, Uzbek, Kyrgyz, and Turkmen - faces typical low-resource language challenges like data scarcity, limited linguistic resources and technology development. However, recent advancements have included the collection of language-specific datasets and the development of models for downstream tasks. Thus, this paper aims to summarize recent progress and identify future research directions. It provides a high-level overview of each language`s linguistic features, the current technology landscape, the application of transfer learning from higher-resource languages, and the availability of labeled and unlabeled data. By outlining the current state, we hope to inspire and facilitate future research."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="veitsman-hartmann-2025-recent">
<titleInfo>
<title>Recent Advancements and Challenges of Turkic Central Asian Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yana</namePart>
<namePart type="family">Veitsman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mareike</namePart>
<namePart type="family">Hartmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Models for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Research in NLP for Central Asian Turkic languages - Kazakh, Uzbek, Kyrgyz, and Turkmen - faces typical low-resource language challenges like data scarcity, limited linguistic resources and technology development. However, recent advancements have included the collection of language-specific datasets and the development of models for downstream tasks. Thus, this paper aims to summarize recent progress and identify future research directions. It provides a high-level overview of each language‘s linguistic features, the current technology landscape, the application of transfer learning from higher-resource languages, and the availability of labeled and unlabeled data. By outlining the current state, we hope to inspire and facilitate future research.</abstract>
<identifier type="citekey">veitsman-hartmann-2025-recent</identifier>
<location>
<url>https://aclanthology.org/2025.loreslm-1.25/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>309</start>
<end>324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recent Advancements and Challenges of Turkic Central Asian Language Processing
%A Veitsman, Yana
%A Hartmann, Mareike
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the First Workshop on Language Models for Low-Resource Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F veitsman-hartmann-2025-recent
%X Research in NLP for Central Asian Turkic languages - Kazakh, Uzbek, Kyrgyz, and Turkmen - faces typical low-resource language challenges like data scarcity, limited linguistic resources and technology development. However, recent advancements have included the collection of language-specific datasets and the development of models for downstream tasks. Thus, this paper aims to summarize recent progress and identify future research directions. It provides a high-level overview of each language‘s linguistic features, the current technology landscape, the application of transfer learning from higher-resource languages, and the availability of labeled and unlabeled data. By outlining the current state, we hope to inspire and facilitate future research.
%U https://aclanthology.org/2025.loreslm-1.25/
%P 309-324
Markdown (Informal)
[Recent Advancements and Challenges of Turkic Central Asian Language Processing](https://aclanthology.org/2025.loreslm-1.25/) (Veitsman & Hartmann, LoResLM 2025)
ACL