@inproceedings{faytak-etal-2026-short,
title = "Short-form verbal arts as a speech data resource in the field",
author = "Faytak, Matthew and
Yang, Tianle and
Akumbu, Pius Wuchu and
Njuasi, Ivo Forghema and
Le Ferrand, {\'E}ric",
booktitle = "Proceedings of the Fifth Workshop on {NLP} Applications to Field Linguistics",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.fieldmatters-1.5/",
pages = "38--45",
abstract = "We propose a method for efficient field data collection of speech resource data which leverages short-form verbal arts, namely riddles and proverbs, which permit a predictable transcript to be assigned to naturalistic but conventionalized utterances. As a proof of concept, we describe a 5.25 hour corpus of proverbs and riddles collected for Kom, a low-resource language of Cameroon, and conduct ASR modeling experiments on the corpus. Results suggest that the method yields high quality speech data, albeit with relatively low lexical diversity. We highlight the alignment of the collected data with community priorities for cultural education and preservation in the Cameroonian context."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="faytak-etal-2026-short">
<titleInfo>
<title>Short-form verbal arts as a speech data resource in the field</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Faytak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianle</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pius</namePart>
<namePart type="given">Wuchu</namePart>
<namePart type="family">Akumbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivo</namePart>
<namePart type="given">Forghema</namePart>
<namePart type="family">Njuasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Éric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a method for efficient field data collection of speech resource data which leverages short-form verbal arts, namely riddles and proverbs, which permit a predictable transcript to be assigned to naturalistic but conventionalized utterances. As a proof of concept, we describe a 5.25 hour corpus of proverbs and riddles collected for Kom, a low-resource language of Cameroon, and conduct ASR modeling experiments on the corpus. Results suggest that the method yields high quality speech data, albeit with relatively low lexical diversity. We highlight the alignment of the collected data with community priorities for cultural education and preservation in the Cameroonian context.</abstract>
<identifier type="citekey">faytak-etal-2026-short</identifier>
<location>
<url>https://aclanthology.org/2026.fieldmatters-1.5/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>38</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Short-form verbal arts as a speech data resource in the field
%A Faytak, Matthew
%A Yang, Tianle
%A Akumbu, Pius Wuchu
%A Njuasi, Ivo Forghema
%A Le Ferrand, Éric
%S Proceedings of the Fifth Workshop on NLP Applications to Field Linguistics
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F faytak-etal-2026-short
%X We propose a method for efficient field data collection of speech resource data which leverages short-form verbal arts, namely riddles and proverbs, which permit a predictable transcript to be assigned to naturalistic but conventionalized utterances. As a proof of concept, we describe a 5.25 hour corpus of proverbs and riddles collected for Kom, a low-resource language of Cameroon, and conduct ASR modeling experiments on the corpus. Results suggest that the method yields high quality speech data, albeit with relatively low lexical diversity. We highlight the alignment of the collected data with community priorities for cultural education and preservation in the Cameroonian context.
%U https://aclanthology.org/2026.fieldmatters-1.5/
%P 38-45
Markdown (Informal)
[Short-form verbal arts as a speech data resource in the field](https://aclanthology.org/2026.fieldmatters-1.5/) (Faytak et al., FieldMatters 2026)
ACL
- Matthew Faytak, Tianle Yang, Pius Wuchu Akumbu, Ivo Forghema Njuasi, and Éric Le Ferrand. 2026. Short-form verbal arts as a speech data resource in the field. In Proceedings of the Fifth Workshop on NLP Applications to Field Linguistics, pages 38–45, Rabat, Morocco. Association for Computational Linguistics.