@inproceedings{kim-etal-2024-korean,
title = "{K}orean Disaster Safety Information Sign Language Translation Benchmark Dataset",
author = "Kim, Wooyoung and
Kim, TaeYong and
Kim, Byeongjin and
Lee, Myeong Jin MJ and
Lee, Gitaek and
Kim, Kirok and
Cha, Jisoo and
Kim, Wooju",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.869",
pages = "9948--9953",
abstract = "Sign language is a crucial means of communication for deaf communities. However, those outside deaf communities often lack understanding of sign language, leading to inadequate communication accessibility for the deaf. Therefore, sign language translation is a significantly important research area. In this context, we present a new benchmark dataset for Korean sign language translation named SSL:korean disaster Safety information Sign Language translation benchmark dataset. Korean sign language translation datasets provided by the National Information Society Agency in South Korea have faced challenges related to computational resources, heterogeneity between train and test sets, and unrefined data. To alleviate the aforementioned issue, we refine the origin data and release them. Additionally, we report experimental results of baseline using a transformer architecture. We empirically demonstrate that the baseline performance varies depending on the tokenization method applied to gloss sequences. In particular, tokenization based on characteristics of sign language outperforms tokenization considering characteristics of spoken language and tokenization utilizing statistical techniques. We release materials at our https://github.com/SSL-Sign-Language/Korean-Disaster-Safety-Information-Sign-Language-Translation-Benchmark-Dataset",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2024-korean">
<titleInfo>
<title>Korean Disaster Safety Information Sign Language Translation Benchmark Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wooyoung</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">TaeYong</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Byeongjin</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Myeong</namePart>
<namePart type="given">Jin</namePart>
<namePart type="given">MJ</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gitaek</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirok</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jisoo</namePart>
<namePart type="family">Cha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wooju</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sign language is a crucial means of communication for deaf communities. However, those outside deaf communities often lack understanding of sign language, leading to inadequate communication accessibility for the deaf. Therefore, sign language translation is a significantly important research area. In this context, we present a new benchmark dataset for Korean sign language translation named SSL:korean disaster Safety information Sign Language translation benchmark dataset. Korean sign language translation datasets provided by the National Information Society Agency in South Korea have faced challenges related to computational resources, heterogeneity between train and test sets, and unrefined data. To alleviate the aforementioned issue, we refine the origin data and release them. Additionally, we report experimental results of baseline using a transformer architecture. We empirically demonstrate that the baseline performance varies depending on the tokenization method applied to gloss sequences. In particular, tokenization based on characteristics of sign language outperforms tokenization considering characteristics of spoken language and tokenization utilizing statistical techniques. We release materials at our https://github.com/SSL-Sign-Language/Korean-Disaster-Safety-Information-Sign-Language-Translation-Benchmark-Dataset</abstract>
<identifier type="citekey">kim-etal-2024-korean</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.869</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9948</start>
<end>9953</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Korean Disaster Safety Information Sign Language Translation Benchmark Dataset
%A Kim, Wooyoung
%A Kim, TaeYong
%A Kim, Byeongjin
%A Lee, Myeong Jin MJ
%A Lee, Gitaek
%A Kim, Kirok
%A Cha, Jisoo
%A Kim, Wooju
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kim-etal-2024-korean
%X Sign language is a crucial means of communication for deaf communities. However, those outside deaf communities often lack understanding of sign language, leading to inadequate communication accessibility for the deaf. Therefore, sign language translation is a significantly important research area. In this context, we present a new benchmark dataset for Korean sign language translation named SSL:korean disaster Safety information Sign Language translation benchmark dataset. Korean sign language translation datasets provided by the National Information Society Agency in South Korea have faced challenges related to computational resources, heterogeneity between train and test sets, and unrefined data. To alleviate the aforementioned issue, we refine the origin data and release them. Additionally, we report experimental results of baseline using a transformer architecture. We empirically demonstrate that the baseline performance varies depending on the tokenization method applied to gloss sequences. In particular, tokenization based on characteristics of sign language outperforms tokenization considering characteristics of spoken language and tokenization utilizing statistical techniques. We release materials at our https://github.com/SSL-Sign-Language/Korean-Disaster-Safety-Information-Sign-Language-Translation-Benchmark-Dataset
%U https://aclanthology.org/2024.lrec-main.869
%P 9948-9953
Markdown (Informal)
[Korean Disaster Safety Information Sign Language Translation Benchmark Dataset](https://aclanthology.org/2024.lrec-main.869) (Kim et al., LREC-COLING 2024)
ACL
- Wooyoung Kim, TaeYong Kim, Byeongjin Kim, Myeong Jin MJ Lee, Gitaek Lee, Kirok Kim, Jisoo Cha, and Wooju Kim. 2024. Korean Disaster Safety Information Sign Language Translation Benchmark Dataset. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 9948–9953, Torino, Italia. ELRA and ICCL.