@inproceedings{jang-etal-2022-automatic,
title = "Automatic Gloss-level Data Augmentation for Sign Language Translation",
author = "Jang, Jin Yea and
Park, Han-Mu and
Shin, Saim and
Shin, Suna and
Yoon, Byungcheon and
Gweon, Gahgene",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.734",
pages = "6808--6813",
abstract = "Securing sufficient data to enable automatic sign language translation modeling is challenging. The data insufficiency issue exists in both video and text modalities; however, fewer studies have been performed on text data augmentation compared to video data. In this study, we present three methods of augmenting sign language text modality data, comprising 3,052 Gloss-level Korean Sign Language (GKSL) and Word-level Korean Language (WKL) sentence pairs. Using each of the three methods, the following number of sentence pairs were created: blank replacement 10,654, sentence paraphrasing 1,494, and synonym replacement 899. Translation experiment results using the augmented data showed that when translating from GKSL to WKL and from WKL to GKSL, Bi-Lingual Evaluation Understudy (BLEU) scores improved by 0.204 and 0.170 respectively, compared to when only the original data was used. The three contributions of this study are as follows. First, we demonstrated that three different augmentation techniques used in existing Natural Language Processing (NLP) can be applied to sign language. Second, we propose an automatic data augmentation method which generates quality data by utilizing the Korean sign language gloss dictionary. Lastly, we publish the Gloss-level Korean Sign Language 13k dataset (GKSL13k), which has verified data quality through expert reviews.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jang-etal-2022-automatic">
<titleInfo>
<title>Automatic Gloss-level Data Augmentation for Sign Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="given">Yea</namePart>
<namePart type="family">Jang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han-Mu</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saim</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suna</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byungcheon</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gahgene</namePart>
<namePart type="family">Gweon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Securing sufficient data to enable automatic sign language translation modeling is challenging. The data insufficiency issue exists in both video and text modalities; however, fewer studies have been performed on text data augmentation compared to video data. In this study, we present three methods of augmenting sign language text modality data, comprising 3,052 Gloss-level Korean Sign Language (GKSL) and Word-level Korean Language (WKL) sentence pairs. Using each of the three methods, the following number of sentence pairs were created: blank replacement 10,654, sentence paraphrasing 1,494, and synonym replacement 899. Translation experiment results using the augmented data showed that when translating from GKSL to WKL and from WKL to GKSL, Bi-Lingual Evaluation Understudy (BLEU) scores improved by 0.204 and 0.170 respectively, compared to when only the original data was used. The three contributions of this study are as follows. First, we demonstrated that three different augmentation techniques used in existing Natural Language Processing (NLP) can be applied to sign language. Second, we propose an automatic data augmentation method which generates quality data by utilizing the Korean sign language gloss dictionary. Lastly, we publish the Gloss-level Korean Sign Language 13k dataset (GKSL13k), which has verified data quality through expert reviews.</abstract>
<identifier type="citekey">jang-etal-2022-automatic</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.734</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>6808</start>
<end>6813</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Gloss-level Data Augmentation for Sign Language Translation
%A Jang, Jin Yea
%A Park, Han-Mu
%A Shin, Saim
%A Shin, Suna
%A Yoon, Byungcheon
%A Gweon, Gahgene
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F jang-etal-2022-automatic
%X Securing sufficient data to enable automatic sign language translation modeling is challenging. The data insufficiency issue exists in both video and text modalities; however, fewer studies have been performed on text data augmentation compared to video data. In this study, we present three methods of augmenting sign language text modality data, comprising 3,052 Gloss-level Korean Sign Language (GKSL) and Word-level Korean Language (WKL) sentence pairs. Using each of the three methods, the following number of sentence pairs were created: blank replacement 10,654, sentence paraphrasing 1,494, and synonym replacement 899. Translation experiment results using the augmented data showed that when translating from GKSL to WKL and from WKL to GKSL, Bi-Lingual Evaluation Understudy (BLEU) scores improved by 0.204 and 0.170 respectively, compared to when only the original data was used. The three contributions of this study are as follows. First, we demonstrated that three different augmentation techniques used in existing Natural Language Processing (NLP) can be applied to sign language. Second, we propose an automatic data augmentation method which generates quality data by utilizing the Korean sign language gloss dictionary. Lastly, we publish the Gloss-level Korean Sign Language 13k dataset (GKSL13k), which has verified data quality through expert reviews.
%U https://aclanthology.org/2022.lrec-1.734
%P 6808-6813
Markdown (Informal)
[Automatic Gloss-level Data Augmentation for Sign Language Translation](https://aclanthology.org/2022.lrec-1.734) (Jang et al., LREC 2022)
ACL