@inproceedings{copot-etal-2022-word,
title = "A Word-and-Paradigm Workflow for Fieldwork Annotation",
author = "Copot, Maria and
Court, Sara and
Diewald, Noah and
Antetomaso, Stephanie and
Elsner, Micha",
editor = "Moeller, Sarah and
Anastasopoulos, Antonios and
Arppe, Antti and
Chaudhary, Aditi and
Harrigan, Atticus and
Holden, Josh and
Lachler, Jordan and
Palmer, Alexis and
Rijhwani, Shruti and
Schwartz, Lane",
booktitle = "Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.computel-1.20/",
doi = "10.18653/v1/2022.computel-1.20",
pages = "159--169",
abstract = "There are many challenges in morphological fieldwork annotation, it heavily relies on segmentation and feature labeling (which have both practical and theoretical drawbacks), it`s time-intensive, and the annotator needs to be linguistically trained and may still annotate things inconsistently. We propose a workflow that relies on unsupervised and active learning grounded in Word-and-Paradigm morphology (WP). Machine learning has the potential to greatly accelerate the annotation process and allow a human annotator to focus on problematic cases, while the WP approach makes for an annotation system that is word-based and relational, removing the need to make decisions about feature labeling and segmentation early in the process and allowing speakers of the language of interest to participate more actively, since linguistic training is not necessary. We present a proof-of-concept for the first step of the workflow, in a realistic fieldwork setting, annotators can process hundreds of forms per hour."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="copot-etal-2022-word">
<titleInfo>
<title>A Word-and-Paradigm Workflow for Fieldwork Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Copot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Court</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="family">Diewald</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Antetomaso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Micha</namePart>
<namePart type="family">Elsner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atticus</namePart>
<namePart type="family">Harrigan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josh</namePart>
<namePart type="family">Holden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Lachler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lane</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>There are many challenges in morphological fieldwork annotation, it heavily relies on segmentation and feature labeling (which have both practical and theoretical drawbacks), it‘s time-intensive, and the annotator needs to be linguistically trained and may still annotate things inconsistently. We propose a workflow that relies on unsupervised and active learning grounded in Word-and-Paradigm morphology (WP). Machine learning has the potential to greatly accelerate the annotation process and allow a human annotator to focus on problematic cases, while the WP approach makes for an annotation system that is word-based and relational, removing the need to make decisions about feature labeling and segmentation early in the process and allowing speakers of the language of interest to participate more actively, since linguistic training is not necessary. We present a proof-of-concept for the first step of the workflow, in a realistic fieldwork setting, annotators can process hundreds of forms per hour.</abstract>
<identifier type="citekey">copot-etal-2022-word</identifier>
<identifier type="doi">10.18653/v1/2022.computel-1.20</identifier>
<location>
<url>https://aclanthology.org/2022.computel-1.20/</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>159</start>
<end>169</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Word-and-Paradigm Workflow for Fieldwork Annotation
%A Copot, Maria
%A Court, Sara
%A Diewald, Noah
%A Antetomaso, Stephanie
%A Elsner, Micha
%Y Moeller, Sarah
%Y Anastasopoulos, Antonios
%Y Arppe, Antti
%Y Chaudhary, Aditi
%Y Harrigan, Atticus
%Y Holden, Josh
%Y Lachler, Jordan
%Y Palmer, Alexis
%Y Rijhwani, Shruti
%Y Schwartz, Lane
%S Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F copot-etal-2022-word
%X There are many challenges in morphological fieldwork annotation, it heavily relies on segmentation and feature labeling (which have both practical and theoretical drawbacks), it‘s time-intensive, and the annotator needs to be linguistically trained and may still annotate things inconsistently. We propose a workflow that relies on unsupervised and active learning grounded in Word-and-Paradigm morphology (WP). Machine learning has the potential to greatly accelerate the annotation process and allow a human annotator to focus on problematic cases, while the WP approach makes for an annotation system that is word-based and relational, removing the need to make decisions about feature labeling and segmentation early in the process and allowing speakers of the language of interest to participate more actively, since linguistic training is not necessary. We present a proof-of-concept for the first step of the workflow, in a realistic fieldwork setting, annotators can process hundreds of forms per hour.
%R 10.18653/v1/2022.computel-1.20
%U https://aclanthology.org/2022.computel-1.20/
%U https://doi.org/10.18653/v1/2022.computel-1.20
%P 159-169
Markdown (Informal)
[A Word-and-Paradigm Workflow for Fieldwork Annotation](https://aclanthology.org/2022.computel-1.20/) (Copot et al., ComputEL 2022)
ACL
- Maria Copot, Sara Court, Noah Diewald, Stephanie Antetomaso, and Micha Elsner. 2022. A Word-and-Paradigm Workflow for Fieldwork Annotation. In Proceedings of the Fifth Workshop on the Use of Computational Methods in the Study of Endangered Languages, pages 159–169, Dublin, Ireland. Association for Computational Linguistics.