@inproceedings{mahmudi-etal-2025-neural,
title = "Can a Neural Model Guide Fieldwork? A Case Study on Morphological Data Collection",
author = "Mahmudi, Aso and
Herce, Borja and
Inostroza Am{\'e}stica, Demian and
Scherbakov, Andreas and
Hovy, Eduard H. and
Vylomova, Ekaterina",
editor = "Sharoff, Serge and
Terryn, Ayla Rigouts and
Zweigenbaum, Pierre and
Rapp, Reinhard",
booktitle = "Proceedings of the 18th Workshop on Building and Using Comparable Corpora (BUCC)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bucc-1.8/",
pages = "62--72",
abstract = "Linguistic fieldwork is an important component in language documentation and the creation of comprehensive linguistic corpora. Despite its significance, the process is often lengthy, exhaustive, and time-consuming. This paper presents a novel model that guides a linguist during the fieldwork and accounts for the dynamics of linguist-speaker interactions. We introduce a novel framework that evaluates the efficiency of various sampling strategies for obtaining morphological data and assesses the effectiveness of state-of-the-art neural models in generalising morphological structures. Our experiments highlight two key strategies for improving the efficiency: (1) increasing the diversity of annotated data by uniform sampling among the cells of the paradigm tables, and (2) using model confidence as a guide to enhance positive interaction by providing reliable predictions during annotation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahmudi-etal-2025-neural">
<titleInfo>
<title>Can a Neural Model Guide Fieldwork? A Case Study on Morphological Data Collection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aso</namePart>
<namePart type="family">Mahmudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Borja</namePart>
<namePart type="family">Herce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Demian</namePart>
<namePart type="family">Inostroza Améstica</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Scherbakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Workshop on Building and Using Comparable Corpora (BUCC)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayla</namePart>
<namePart type="given">Rigouts</namePart>
<namePart type="family">Terryn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Zweigenbaum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reinhard</namePart>
<namePart type="family">Rapp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Linguistic fieldwork is an important component in language documentation and the creation of comprehensive linguistic corpora. Despite its significance, the process is often lengthy, exhaustive, and time-consuming. This paper presents a novel model that guides a linguist during the fieldwork and accounts for the dynamics of linguist-speaker interactions. We introduce a novel framework that evaluates the efficiency of various sampling strategies for obtaining morphological data and assesses the effectiveness of state-of-the-art neural models in generalising morphological structures. Our experiments highlight two key strategies for improving the efficiency: (1) increasing the diversity of annotated data by uniform sampling among the cells of the paradigm tables, and (2) using model confidence as a guide to enhance positive interaction by providing reliable predictions during annotation.</abstract>
<identifier type="citekey">mahmudi-etal-2025-neural</identifier>
<location>
<url>https://aclanthology.org/2025.bucc-1.8/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>62</start>
<end>72</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can a Neural Model Guide Fieldwork? A Case Study on Morphological Data Collection
%A Mahmudi, Aso
%A Herce, Borja
%A Inostroza Améstica, Demian
%A Scherbakov, Andreas
%A Hovy, Eduard H.
%A Vylomova, Ekaterina
%Y Sharoff, Serge
%Y Terryn, Ayla Rigouts
%Y Zweigenbaum, Pierre
%Y Rapp, Reinhard
%S Proceedings of the 18th Workshop on Building and Using Comparable Corpora (BUCC)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F mahmudi-etal-2025-neural
%X Linguistic fieldwork is an important component in language documentation and the creation of comprehensive linguistic corpora. Despite its significance, the process is often lengthy, exhaustive, and time-consuming. This paper presents a novel model that guides a linguist during the fieldwork and accounts for the dynamics of linguist-speaker interactions. We introduce a novel framework that evaluates the efficiency of various sampling strategies for obtaining morphological data and assesses the effectiveness of state-of-the-art neural models in generalising morphological structures. Our experiments highlight two key strategies for improving the efficiency: (1) increasing the diversity of annotated data by uniform sampling among the cells of the paradigm tables, and (2) using model confidence as a guide to enhance positive interaction by providing reliable predictions during annotation.
%U https://aclanthology.org/2025.bucc-1.8/
%P 62-72
Markdown (Informal)
[Can a Neural Model Guide Fieldwork? A Case Study on Morphological Data Collection](https://aclanthology.org/2025.bucc-1.8/) (Mahmudi et al., BUCC 2025)
ACL