@inproceedings{honghaofu-etal-2024-signer,
title = "Signer Diversity-driven Data Augmentation for Signer-Independent Sign Language Translation",
author = "Fu, Honghao and
Zhang, Liang and
Fu, Biao and
Zhao, Rui and
Su, Jinsong and
Shi, Xiaodong and
Chen, Yidong",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.140",
doi = "10.18653/v1/2024.findings-naacl.140",
pages = "2182--2193",
abstract = "The primary objective of sign language translation (SLT) is to transform sign language videos into natural sentences.A crucial challenge in this field is developing signer-independent SLT systems which requires models to generalize effectively to signers not encountered during training.This challenge is exacerbated by the limited diversity of signers in existing SLT datasets, which often results in suboptimal generalization capabilities of current models.Achieving robustness to unseen signers is essential for signer-independent SLT.However, most existing method relies on signer identity labels, which is often impractical and costly in real-world applications.To address this issue, we propose the Signer Diversity-driven Data Augmentation (SDDA) method that can achieve good generalization without relying on signer identity labels. SDDA comprises two data augmentation schemes. The first is data augmentation based on adversarial training, which aims to utilize the gradients of the model to generate adversarial examples. The second is data augmentation based on diffusion model, which focuses on using the advanced diffusion-based text guided image editing method to modify the appearances of the signer in images. The combination of the two strategies significantly enriches the diversity of signers in the training process.Moreover, we introduce a consistency loss and a discrimination loss to enhance the learning of signer-independent features.Our experimental results demonstrate our model significantly enhances the performance of SLT in the signer-independent setting, achieving state-of-the-art results without relying on signer identity labels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="honghaofu-etal-2024-signer">
<titleInfo>
<title>Signer Diversity-driven Data Augmentation for Signer-Independent Sign Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Honghao</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Biao</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinsong</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaodong</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yidong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The primary objective of sign language translation (SLT) is to transform sign language videos into natural sentences.A crucial challenge in this field is developing signer-independent SLT systems which requires models to generalize effectively to signers not encountered during training.This challenge is exacerbated by the limited diversity of signers in existing SLT datasets, which often results in suboptimal generalization capabilities of current models.Achieving robustness to unseen signers is essential for signer-independent SLT.However, most existing method relies on signer identity labels, which is often impractical and costly in real-world applications.To address this issue, we propose the Signer Diversity-driven Data Augmentation (SDDA) method that can achieve good generalization without relying on signer identity labels. SDDA comprises two data augmentation schemes. The first is data augmentation based on adversarial training, which aims to utilize the gradients of the model to generate adversarial examples. The second is data augmentation based on diffusion model, which focuses on using the advanced diffusion-based text guided image editing method to modify the appearances of the signer in images. The combination of the two strategies significantly enriches the diversity of signers in the training process.Moreover, we introduce a consistency loss and a discrimination loss to enhance the learning of signer-independent features.Our experimental results demonstrate our model significantly enhances the performance of SLT in the signer-independent setting, achieving state-of-the-art results without relying on signer identity labels.</abstract>
<identifier type="citekey">honghaofu-etal-2024-signer</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.140</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.140</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>2182</start>
<end>2193</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Signer Diversity-driven Data Augmentation for Signer-Independent Sign Language Translation
%A Fu, Honghao
%A Zhang, Liang
%A Fu, Biao
%A Zhao, Rui
%A Su, Jinsong
%A Shi, Xiaodong
%A Chen, Yidong
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F honghaofu-etal-2024-signer
%X The primary objective of sign language translation (SLT) is to transform sign language videos into natural sentences.A crucial challenge in this field is developing signer-independent SLT systems which requires models to generalize effectively to signers not encountered during training.This challenge is exacerbated by the limited diversity of signers in existing SLT datasets, which often results in suboptimal generalization capabilities of current models.Achieving robustness to unseen signers is essential for signer-independent SLT.However, most existing method relies on signer identity labels, which is often impractical and costly in real-world applications.To address this issue, we propose the Signer Diversity-driven Data Augmentation (SDDA) method that can achieve good generalization without relying on signer identity labels. SDDA comprises two data augmentation schemes. The first is data augmentation based on adversarial training, which aims to utilize the gradients of the model to generate adversarial examples. The second is data augmentation based on diffusion model, which focuses on using the advanced diffusion-based text guided image editing method to modify the appearances of the signer in images. The combination of the two strategies significantly enriches the diversity of signers in the training process.Moreover, we introduce a consistency loss and a discrimination loss to enhance the learning of signer-independent features.Our experimental results demonstrate our model significantly enhances the performance of SLT in the signer-independent setting, achieving state-of-the-art results without relying on signer identity labels.
%R 10.18653/v1/2024.findings-naacl.140
%U https://aclanthology.org/2024.findings-naacl.140
%U https://doi.org/10.18653/v1/2024.findings-naacl.140
%P 2182-2193
Markdown (Informal)
[Signer Diversity-driven Data Augmentation for Signer-Independent Sign Language Translation](https://aclanthology.org/2024.findings-naacl.140) (Fu et al., Findings 2024)
ACL