@inproceedings{dong-etal-2024-word,
title = "Word-Conditioned 3{D} {A}merican {S}ign {L}anguage Motion Generation",
author = "Dong, Lu and
Wang, Xiao and
Nwogu, Ifeoma",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.584/",
doi = "10.18653/v1/2024.findings-emnlp.584",
pages = "9993--9999",
abstract = "Sign words are the building blocks of any sign language. In this work, we present wSignGen, a word-conditioned 3D American Sign Language (ASL) generation model dedicated to synthesizing realistic and grammatically accurate motion sequences for sign words. Our approach leverages a transformer-based diffusion model, trained on a curated dataset of 3D motion meshes from word-level ASL videos. By integrating CLIP, wSignGen offers two advantages: image-based generation, which is particularly useful for children learning sign language but not yet able to read, and the ability to generalize to unseen synonyms. Experiments demonstrate that wSignGen significantly outperforms the baseline model in the task of sign word generation. Moreover, human evaluation experiments show that wSignGen can generate high-quality, grammatically correct ASL signs effectively conveyed through 3D avatars."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dong-etal-2024-word">
<titleInfo>
<title>Word-Conditioned 3D American Sign Language Motion Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ifeoma</namePart>
<namePart type="family">Nwogu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sign words are the building blocks of any sign language. In this work, we present wSignGen, a word-conditioned 3D American Sign Language (ASL) generation model dedicated to synthesizing realistic and grammatically accurate motion sequences for sign words. Our approach leverages a transformer-based diffusion model, trained on a curated dataset of 3D motion meshes from word-level ASL videos. By integrating CLIP, wSignGen offers two advantages: image-based generation, which is particularly useful for children learning sign language but not yet able to read, and the ability to generalize to unseen synonyms. Experiments demonstrate that wSignGen significantly outperforms the baseline model in the task of sign word generation. Moreover, human evaluation experiments show that wSignGen can generate high-quality, grammatically correct ASL signs effectively conveyed through 3D avatars.</abstract>
<identifier type="citekey">dong-etal-2024-word</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.584</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.584/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>9993</start>
<end>9999</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word-Conditioned 3D American Sign Language Motion Generation
%A Dong, Lu
%A Wang, Xiao
%A Nwogu, Ifeoma
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F dong-etal-2024-word
%X Sign words are the building blocks of any sign language. In this work, we present wSignGen, a word-conditioned 3D American Sign Language (ASL) generation model dedicated to synthesizing realistic and grammatically accurate motion sequences for sign words. Our approach leverages a transformer-based diffusion model, trained on a curated dataset of 3D motion meshes from word-level ASL videos. By integrating CLIP, wSignGen offers two advantages: image-based generation, which is particularly useful for children learning sign language but not yet able to read, and the ability to generalize to unseen synonyms. Experiments demonstrate that wSignGen significantly outperforms the baseline model in the task of sign word generation. Moreover, human evaluation experiments show that wSignGen can generate high-quality, grammatically correct ASL signs effectively conveyed through 3D avatars.
%R 10.18653/v1/2024.findings-emnlp.584
%U https://aclanthology.org/2024.findings-emnlp.584/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.584
%P 9993-9999
Markdown (Informal)
[Word-Conditioned 3D American Sign Language Motion Generation](https://aclanthology.org/2024.findings-emnlp.584/) (Dong et al., Findings 2024)
ACL