@inproceedings{afanasev-2026-quantitative,
title = "Quantitative Lect Description: A Case Study of Lemko from the Field Data of 1920s-1930s",
author = "Afanasev, Ilia",
booktitle = "Proceedings of the Fifth Workshop on {NLP} Applications to Field Linguistics",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.fieldmatters-1.6/",
pages = "46--59",
abstract = "While qualitative descriptions (in the form of reference grammars) and benchmarks for low-resource languages are becoming increasingly widespread, computational linguists do not often use quantitative methods to describe a new lect rather than a new model. This paper intends to close this lacuna. The case study is a Lemko text transcribed at the beginning of the twentieth century. Using morphosyntactic tagging and topic modelling, the study demonstrates areal influences and archaic features of the lect. Fine-grained evaluation significantly assists in identifying subtle patterns that are not readily apparent through traditional metrics such as accuracy score. The results highlight the necessity of a more detailed analysis of model performance, which may yield more linguistically significant results than a purely manual check. This information is present in the resulting dataset, which can be used for further investigation into the structural features of the Lemko lect."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="afanasev-2026-quantitative">
<titleInfo>
<title>Quantitative Lect Description: A Case Study of Lemko from the Field Data of 1920s-1930s</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ilia</namePart>
<namePart type="family">Afanasev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While qualitative descriptions (in the form of reference grammars) and benchmarks for low-resource languages are becoming increasingly widespread, computational linguists do not often use quantitative methods to describe a new lect rather than a new model. This paper intends to close this lacuna. The case study is a Lemko text transcribed at the beginning of the twentieth century. Using morphosyntactic tagging and topic modelling, the study demonstrates areal influences and archaic features of the lect. Fine-grained evaluation significantly assists in identifying subtle patterns that are not readily apparent through traditional metrics such as accuracy score. The results highlight the necessity of a more detailed analysis of model performance, which may yield more linguistically significant results than a purely manual check. This information is present in the resulting dataset, which can be used for further investigation into the structural features of the Lemko lect.</abstract>
<identifier type="citekey">afanasev-2026-quantitative</identifier>
<location>
<url>https://aclanthology.org/2026.fieldmatters-1.6/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>46</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quantitative Lect Description: A Case Study of Lemko from the Field Data of 1920s-1930s
%A Afanasev, Ilia
%S Proceedings of the Fifth Workshop on NLP Applications to Field Linguistics
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F afanasev-2026-quantitative
%X While qualitative descriptions (in the form of reference grammars) and benchmarks for low-resource languages are becoming increasingly widespread, computational linguists do not often use quantitative methods to describe a new lect rather than a new model. This paper intends to close this lacuna. The case study is a Lemko text transcribed at the beginning of the twentieth century. Using morphosyntactic tagging and topic modelling, the study demonstrates areal influences and archaic features of the lect. Fine-grained evaluation significantly assists in identifying subtle patterns that are not readily apparent through traditional metrics such as accuracy score. The results highlight the necessity of a more detailed analysis of model performance, which may yield more linguistically significant results than a purely manual check. This information is present in the resulting dataset, which can be used for further investigation into the structural features of the Lemko lect.
%U https://aclanthology.org/2026.fieldmatters-1.6/
%P 46-59
Markdown (Informal)
[Quantitative Lect Description: A Case Study of Lemko from the Field Data of 1920s-1930s](https://aclanthology.org/2026.fieldmatters-1.6/) (Afanasev, FieldMatters 2026)
ACL