@inproceedings{zagidov-brochhagen-2026-avarlab,
title = "{A}var{L}ab: An Integrated Digital Ecosystem for {A}var, a Morphologically Rich Low-Resource Language",
author = "Zagidov, Kebed and
Brochhagen, Thomas",
editor = "Agyapong, Godfred and
Moeller, Sarah and
Arppe, Antti and
Marashian, Ali and
Rosenblum, Daisy",
booktitle = "Proceedings of the Ninth Workshop on the Use of Computational Methods in the Study of Endangered Languages ({C}omput{EL}-9)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.computel-1.7/",
pages = "62--71",
ISBN = "979-8-89176-422-4",
abstract = "This paper presents a digital ecosystem designed for Avar, a morphologically rich and vulnerable Northeast Caucasian language. Addressing the common bottleneck where lexical resources, corpora, and computational tools are developed in isolation or are entirely absent, we propose the ``generate-verify'' workflow. By developing a scalable, rule-based computational architecture, our system specifically targets the challenges of low-resource settings, overcoming data sparsity to generate over one million inflected forms from a static dictionary of 14,700 entries.Furthermore, by coupling morphological generation with corpus verification, we introduce a dynamic method to rapidly analyze and expand endangered language data. This approach transforms static linguistic documentation into active language reclamation tools, supporting dictionary lookup and the creation of silver-standard annotations for downstream NLP. The platform also serves as a unified model for the collection, management, and mobilization of fragmented language data, ensuring that the resulting resources are directly accessible and beneficial to the speaker community. Ultimately, AvarLab provides a practical, adaptable pathway for building sustainable digital infrastructure by fostering interaction among documentary linguists, computer scientists, and native speakers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zagidov-brochhagen-2026-avarlab">
<titleInfo>
<title>AvarLab: An Integrated Digital Ecosystem for Avar, a Morphologically Rich Low-Resource Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kebed</namePart>
<namePart type="family">Zagidov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Brochhagen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Workshop on the Use of Computational Methods in the Study of Endangered Languages (ComputEL-9)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Godfred</namePart>
<namePart type="family">Agyapong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Marashian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisy</namePart>
<namePart type="family">Rosenblum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-422-4</identifier>
</relatedItem>
<abstract>This paper presents a digital ecosystem designed for Avar, a morphologically rich and vulnerable Northeast Caucasian language. Addressing the common bottleneck where lexical resources, corpora, and computational tools are developed in isolation or are entirely absent, we propose the “generate-verify” workflow. By developing a scalable, rule-based computational architecture, our system specifically targets the challenges of low-resource settings, overcoming data sparsity to generate over one million inflected forms from a static dictionary of 14,700 entries.Furthermore, by coupling morphological generation with corpus verification, we introduce a dynamic method to rapidly analyze and expand endangered language data. This approach transforms static linguistic documentation into active language reclamation tools, supporting dictionary lookup and the creation of silver-standard annotations for downstream NLP. The platform also serves as a unified model for the collection, management, and mobilization of fragmented language data, ensuring that the resulting resources are directly accessible and beneficial to the speaker community. Ultimately, AvarLab provides a practical, adaptable pathway for building sustainable digital infrastructure by fostering interaction among documentary linguists, computer scientists, and native speakers.</abstract>
<identifier type="citekey">zagidov-brochhagen-2026-avarlab</identifier>
<location>
<url>https://aclanthology.org/2026.computel-1.7/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>62</start>
<end>71</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AvarLab: An Integrated Digital Ecosystem for Avar, a Morphologically Rich Low-Resource Language
%A Zagidov, Kebed
%A Brochhagen, Thomas
%Y Agyapong, Godfred
%Y Moeller, Sarah
%Y Arppe, Antti
%Y Marashian, Ali
%Y Rosenblum, Daisy
%S Proceedings of the Ninth Workshop on the Use of Computational Methods in the Study of Endangered Languages (ComputEL-9)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-422-4
%F zagidov-brochhagen-2026-avarlab
%X This paper presents a digital ecosystem designed for Avar, a morphologically rich and vulnerable Northeast Caucasian language. Addressing the common bottleneck where lexical resources, corpora, and computational tools are developed in isolation or are entirely absent, we propose the “generate-verify” workflow. By developing a scalable, rule-based computational architecture, our system specifically targets the challenges of low-resource settings, overcoming data sparsity to generate over one million inflected forms from a static dictionary of 14,700 entries.Furthermore, by coupling morphological generation with corpus verification, we introduce a dynamic method to rapidly analyze and expand endangered language data. This approach transforms static linguistic documentation into active language reclamation tools, supporting dictionary lookup and the creation of silver-standard annotations for downstream NLP. The platform also serves as a unified model for the collection, management, and mobilization of fragmented language data, ensuring that the resulting resources are directly accessible and beneficial to the speaker community. Ultimately, AvarLab provides a practical, adaptable pathway for building sustainable digital infrastructure by fostering interaction among documentary linguists, computer scientists, and native speakers.
%U https://aclanthology.org/2026.computel-1.7/
%P 62-71
Markdown (Informal)
[AvarLab: An Integrated Digital Ecosystem for Avar, a Morphologically Rich Low-Resource Language](https://aclanthology.org/2026.computel-1.7/) (Zagidov & Brochhagen, ComputEL 2026)
ACL