@inproceedings{le-ferrand-etal-2022-learning,
title = "Learning From Failure: Data Capture in an {A}ustralian Aboriginal Community",
author = "Le Ferrand, Eric and
Bird, Steven and
Besacier, Laurent",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.342",
doi = "10.18653/v1/2022.acl-long.342",
pages = "4988--4998",
abstract = "Most low resource language technology development is premised on the need to collect data for training statistical models. When we follow the typical process of recording and transcribing text for small Indigenous languages, we hit up against the so-called {``}transcription bottleneck.{''} Therefore it is worth exploring new ways of engaging with speakers which generate data while avoiding the transcription bottleneck. We have deployed a prototype app for speakers to use for confirming system guesses in an approach to transcription based on word spotting. However, in the process of testing the app we encountered many new problems for engagement with speakers. This paper presents a close-up study of the process of deploying data capture technology on the ground in an Australian Aboriginal community. We reflect on our interactions with participants and draw lessons that apply to anyone seeking to develop methods for language data collection in an Indigenous community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="le-ferrand-etal-2022-learning">
<titleInfo>
<title>Learning From Failure: Data Capture in an Australian Aboriginal Community</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bird</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Besacier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most low resource language technology development is premised on the need to collect data for training statistical models. When we follow the typical process of recording and transcribing text for small Indigenous languages, we hit up against the so-called “transcription bottleneck.” Therefore it is worth exploring new ways of engaging with speakers which generate data while avoiding the transcription bottleneck. We have deployed a prototype app for speakers to use for confirming system guesses in an approach to transcription based on word spotting. However, in the process of testing the app we encountered many new problems for engagement with speakers. This paper presents a close-up study of the process of deploying data capture technology on the ground in an Australian Aboriginal community. We reflect on our interactions with participants and draw lessons that apply to anyone seeking to develop methods for language data collection in an Indigenous community.</abstract>
<identifier type="citekey">le-ferrand-etal-2022-learning</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.342</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.342</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>4988</start>
<end>4998</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning From Failure: Data Capture in an Australian Aboriginal Community
%A Le Ferrand, Eric
%A Bird, Steven
%A Besacier, Laurent
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F le-ferrand-etal-2022-learning
%X Most low resource language technology development is premised on the need to collect data for training statistical models. When we follow the typical process of recording and transcribing text for small Indigenous languages, we hit up against the so-called “transcription bottleneck.” Therefore it is worth exploring new ways of engaging with speakers which generate data while avoiding the transcription bottleneck. We have deployed a prototype app for speakers to use for confirming system guesses in an approach to transcription based on word spotting. However, in the process of testing the app we encountered many new problems for engagement with speakers. This paper presents a close-up study of the process of deploying data capture technology on the ground in an Australian Aboriginal community. We reflect on our interactions with participants and draw lessons that apply to anyone seeking to develop methods for language data collection in an Indigenous community.
%R 10.18653/v1/2022.acl-long.342
%U https://aclanthology.org/2022.acl-long.342
%U https://doi.org/10.18653/v1/2022.acl-long.342
%P 4988-4998
Markdown (Informal)
[Learning From Failure: Data Capture in an Australian Aboriginal Community](https://aclanthology.org/2022.acl-long.342) (Le Ferrand et al., ACL 2022)
ACL