@inproceedings{asadpour-etal-2025-practical,
title = "A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukr{\={i}} {K}urdish",
author = "Asadpour, Hiwa and
Okabe, Shu and
Fraser, Alexander",
editor = "Le Ferrand, {\'E}ric and
Klyachko, Elena and
Postnikova, Anna and
Shavrina, Tatiana and
Serikov, Oleg and
Voloshina, Ekaterina and
Vylomova, Ekaterina",
booktitle = "Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.fieldmatters-1.6/",
pages = "65--75",
ISBN = "979-8-89176-282-4",
abstract = "Interlinear gloss generation aims to predict linguistic annotations (gloss) for a sentence in a language that is usually under ongoing documentation. Such output is a first draft for the linguist to work with and should reduce the manual workload.This article studies a simple glossing pipeline based on a Conditional Random Field and applies it to a small fieldwork corpus in Mukr{\={i}} Kurdish, a variety of Central Kurdish.We mainly focus on making the tool as accessible as possible for field linguists, so it can run on standard computers without the need for GPUs. Our pipeline predicts common grammatical patterns robustly and, more generally, frequent combinations of morphemes and glosses. Although more advanced neural models do reach better results, our feature-based system still manages to be competitive and to provide interpretability.To foster further collaboration between field linguistics and NLP, we also provide some recommendations regarding documentation endeavours and release our pipeline code alongside."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="asadpour-etal-2025-practical">
<titleInfo>
<title>A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukrī Kurdish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hiwa</namePart>
<namePart type="family">Asadpour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shu</namePart>
<namePart type="family">Okabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Fraser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Éric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-282-4</identifier>
</relatedItem>
<abstract>Interlinear gloss generation aims to predict linguistic annotations (gloss) for a sentence in a language that is usually under ongoing documentation. Such output is a first draft for the linguist to work with and should reduce the manual workload.This article studies a simple glossing pipeline based on a Conditional Random Field and applies it to a small fieldwork corpus in Mukrī Kurdish, a variety of Central Kurdish.We mainly focus on making the tool as accessible as possible for field linguists, so it can run on standard computers without the need for GPUs. Our pipeline predicts common grammatical patterns robustly and, more generally, frequent combinations of morphemes and glosses. Although more advanced neural models do reach better results, our feature-based system still manages to be competitive and to provide interpretability.To foster further collaboration between field linguistics and NLP, we also provide some recommendations regarding documentation endeavours and release our pipeline code alongside.</abstract>
<identifier type="citekey">asadpour-etal-2025-practical</identifier>
<location>
<url>https://aclanthology.org/2025.fieldmatters-1.6/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>65</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukrī Kurdish
%A Asadpour, Hiwa
%A Okabe, Shu
%A Fraser, Alexander
%Y Le Ferrand, Éric
%Y Klyachko, Elena
%Y Postnikova, Anna
%Y Shavrina, Tatiana
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Vylomova, Ekaterina
%S Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-282-4
%F asadpour-etal-2025-practical
%X Interlinear gloss generation aims to predict linguistic annotations (gloss) for a sentence in a language that is usually under ongoing documentation. Such output is a first draft for the linguist to work with and should reduce the manual workload.This article studies a simple glossing pipeline based on a Conditional Random Field and applies it to a small fieldwork corpus in Mukrī Kurdish, a variety of Central Kurdish.We mainly focus on making the tool as accessible as possible for field linguists, so it can run on standard computers without the need for GPUs. Our pipeline predicts common grammatical patterns robustly and, more generally, frequent combinations of morphemes and glosses. Although more advanced neural models do reach better results, our feature-based system still manages to be competitive and to provide interpretability.To foster further collaboration between field linguistics and NLP, we also provide some recommendations regarding documentation endeavours and release our pipeline code alongside.
%U https://aclanthology.org/2025.fieldmatters-1.6/
%P 65-75
Markdown (Informal)
[A Practical Tool to Help Automate Interlinear Glossing: a Study on Mukrī Kurdish](https://aclanthology.org/2025.fieldmatters-1.6/) (Asadpour et al., FieldMatters 2025)
ACL