@inproceedings{bejarano-etal-2022-perusil,
title = "{P}eru{SIL}: A Framework to Build a Continuous {P}eruvian {S}ign {L}anguage Interpretation Dataset",
author = "Bejarano, Gissella and
Huamani-Malca, Joe and
Cerna-Herrera, Francisco and
Alva-Manchego, Fernando and
Rivas, Pablo",
editor = "Efthimiou, Eleni and
Fotinea, Stavroula-Evita and
Hanke, Thomas and
Hochgesang, Julie A. and
Kristoffersen, Jette and
Mesch, Johanna and
Schulder, Marc",
booktitle = "Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.signlang-1.1",
pages = "1--8",
abstract = "Video-based datasets for Continuous Sign Language are scarce due to the challenging task of recording videos from native signers and the reduced number of people who can annotate sign language. COVID-19 has evidenced the key role of sign language interpreters in delivering nationwide health messages to deaf communities. In this paper, we present a framework for creating a multi-modal sign language interpretation dataset based on videos and we use it to create the first dataset for Peruvian Sign Language (LSP) interpretation annotated by hearing volunteers who have intermediate knowledge of PSL guided by the video audio. We rely on hearing people to produce a first version of the annotations, which should be reviewed by native signers in the future. Our contributions: i) we design a framework to annotate a sign Language dataset; ii) we release the first annotated LSP multi-modal interpretation dataset (AEC); iii) we evaluate the annotation done by hearing people by training a sign language recognition model. Our model reaches up to 80.3{\%} of accuracy among a minimum of five classes (signs) AEC dataset, and 52.4{\%} in a second dataset. Nevertheless, analysis by subject in the second dataset show variations worth to discuss.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bejarano-etal-2022-perusil">
<titleInfo>
<title>PeruSIL: A Framework to Build a Continuous Peruvian Sign Language Interpretation Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gissella</namePart>
<namePart type="family">Bejarano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joe</namePart>
<namePart type="family">Huamani-Malca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Cerna-Herrera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pablo</namePart>
<namePart type="family">Rivas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eleni</namePart>
<namePart type="family">Efthimiou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stavroula-Evita</namePart>
<namePart type="family">Fotinea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Hanke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Hochgesang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jette</namePart>
<namePart type="family">Kristoffersen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Mesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Schulder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Video-based datasets for Continuous Sign Language are scarce due to the challenging task of recording videos from native signers and the reduced number of people who can annotate sign language. COVID-19 has evidenced the key role of sign language interpreters in delivering nationwide health messages to deaf communities. In this paper, we present a framework for creating a multi-modal sign language interpretation dataset based on videos and we use it to create the first dataset for Peruvian Sign Language (LSP) interpretation annotated by hearing volunteers who have intermediate knowledge of PSL guided by the video audio. We rely on hearing people to produce a first version of the annotations, which should be reviewed by native signers in the future. Our contributions: i) we design a framework to annotate a sign Language dataset; ii) we release the first annotated LSP multi-modal interpretation dataset (AEC); iii) we evaluate the annotation done by hearing people by training a sign language recognition model. Our model reaches up to 80.3% of accuracy among a minimum of five classes (signs) AEC dataset, and 52.4% in a second dataset. Nevertheless, analysis by subject in the second dataset show variations worth to discuss.</abstract>
<identifier type="citekey">bejarano-etal-2022-perusil</identifier>
<location>
<url>https://aclanthology.org/2022.signlang-1.1</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PeruSIL: A Framework to Build a Continuous Peruvian Sign Language Interpretation Dataset
%A Bejarano, Gissella
%A Huamani-Malca, Joe
%A Cerna-Herrera, Francisco
%A Alva-Manchego, Fernando
%A Rivas, Pablo
%Y Efthimiou, Eleni
%Y Fotinea, Stavroula-Evita
%Y Hanke, Thomas
%Y Hochgesang, Julie A.
%Y Kristoffersen, Jette
%Y Mesch, Johanna
%Y Schulder, Marc
%S Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F bejarano-etal-2022-perusil
%X Video-based datasets for Continuous Sign Language are scarce due to the challenging task of recording videos from native signers and the reduced number of people who can annotate sign language. COVID-19 has evidenced the key role of sign language interpreters in delivering nationwide health messages to deaf communities. In this paper, we present a framework for creating a multi-modal sign language interpretation dataset based on videos and we use it to create the first dataset for Peruvian Sign Language (LSP) interpretation annotated by hearing volunteers who have intermediate knowledge of PSL guided by the video audio. We rely on hearing people to produce a first version of the annotations, which should be reviewed by native signers in the future. Our contributions: i) we design a framework to annotate a sign Language dataset; ii) we release the first annotated LSP multi-modal interpretation dataset (AEC); iii) we evaluate the annotation done by hearing people by training a sign language recognition model. Our model reaches up to 80.3% of accuracy among a minimum of five classes (signs) AEC dataset, and 52.4% in a second dataset. Nevertheless, analysis by subject in the second dataset show variations worth to discuss.
%U https://aclanthology.org/2022.signlang-1.1
%P 1-8
Markdown (Informal)
[PeruSIL: A Framework to Build a Continuous Peruvian Sign Language Interpretation Dataset](https://aclanthology.org/2022.signlang-1.1) (Bejarano et al., SignLang 2022)
ACL