@inproceedings{bianco-etal-2025-multilingual,
title = "Multilingual Sign Language Translation with Unified Datasets and Pose-Based Transformers",
author = "Bianco, Pedro Alejandro Dal and
Stanchi, Oscar Agust{\'i}n and
Quiroga, Facundo Manuel and
Ronchetti, Franco",
editor = "Hasanuzzaman, Mohammed and
Quiroga, Facundo Manuel and
Modi, Ashutosh and
Kamila, Sabyasachi and
Artiaga, Keren and
Joshi, Abhinav and
Singh, Sanjeet",
booktitle = "Proceedings of the Workshop on Sign Language Processing (WSLP)",
month = dec,
year = "2025",
address = "IIT Bombay, Mumbai, India (Co-located with IJCNLP{--}AACL 2025)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wslp-main.5/",
pages = "27--32",
ISBN = "979-8-89176-304-3",
abstract = "Sign languages are highly diverse across countries and regions, yet most Sign Language Translation (SLT) work remains monolingual. We explore a unified, multi-target SLT model trained jointly on four sign languages (German, Greek, Argentinian, Indian) using a standardized data layer. Our model operates on pose keypoints extracted with MediaPipe, yielding a lightweight and dataset-agnostic representation that is less sensitive to backgrounds, clothing, cameras, or signer identity while retaining motion and configuration cues. On RWTH-PHOENIX-Weather 2014T, Greek Sign Language Dataset, LSA-T, and ISLTranslate, naive joint training under a fully shared parameterization performs worse than monolingual baselines; however, a simple two-stage schedule: multilingual pre-training followed by a short language-specific fine-tuning, recovers and surpasses monolingual results on three datasets (PHOENIX14T: $+0.15$ BLEU-4; GSL: $+0.74$; ISL: $+0.10$) while narrowing the gap on the most challenging corpus (LSA-T: $-0.24$ vs. monolingual). Scores span from BLEU-4$\approx 1$ on open-domain news (LSA-T) to $>90$ on constrained curricula (GSL), highlighting the role of dataset complexity. We release our code to facilitate training and evaluation of multilingual SLT models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bianco-etal-2025-multilingual">
<titleInfo>
<title>Multilingual Sign Language Translation with Unified Datasets and Pose-Based Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="given">Alejandro</namePart>
<namePart type="given">Dal</namePart>
<namePart type="family">Bianco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oscar</namePart>
<namePart type="given">Agustín</namePart>
<namePart type="family">Stanchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Facundo</namePart>
<namePart type="given">Manuel</namePart>
<namePart type="family">Quiroga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franco</namePart>
<namePart type="family">Ronchetti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Sign Language Processing (WSLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Hasanuzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Facundo</namePart>
<namePart type="given">Manuel</namePart>
<namePart type="family">Quiroga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashutosh</namePart>
<namePart type="family">Modi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabyasachi</namePart>
<namePart type="family">Kamila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keren</namePart>
<namePart type="family">Artiaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanjeet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">IIT Bombay, Mumbai, India (Co-located with IJCNLP–AACL 2025)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-304-3</identifier>
</relatedItem>
<abstract>Sign languages are highly diverse across countries and regions, yet most Sign Language Translation (SLT) work remains monolingual. We explore a unified, multi-target SLT model trained jointly on four sign languages (German, Greek, Argentinian, Indian) using a standardized data layer. Our model operates on pose keypoints extracted with MediaPipe, yielding a lightweight and dataset-agnostic representation that is less sensitive to backgrounds, clothing, cameras, or signer identity while retaining motion and configuration cues. On RWTH-PHOENIX-Weather 2014T, Greek Sign Language Dataset, LSA-T, and ISLTranslate, naive joint training under a fully shared parameterization performs worse than monolingual baselines; however, a simple two-stage schedule: multilingual pre-training followed by a short language-specific fine-tuning, recovers and surpasses monolingual results on three datasets (PHOENIX14T: +0.15 BLEU-4; GSL: +0.74; ISL: +0.10) while narrowing the gap on the most challenging corpus (LSA-T: -0.24 vs. monolingual). Scores span from BLEU-4\approx 1 on open-domain news (LSA-T) to >90 on constrained curricula (GSL), highlighting the role of dataset complexity. We release our code to facilitate training and evaluation of multilingual SLT models.</abstract>
<identifier type="citekey">bianco-etal-2025-multilingual</identifier>
<location>
<url>https://aclanthology.org/2025.wslp-main.5/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>27</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Sign Language Translation with Unified Datasets and Pose-Based Transformers
%A Bianco, Pedro Alejandro Dal
%A Stanchi, Oscar Agustín
%A Quiroga, Facundo Manuel
%A Ronchetti, Franco
%Y Hasanuzzaman, Mohammed
%Y Quiroga, Facundo Manuel
%Y Modi, Ashutosh
%Y Kamila, Sabyasachi
%Y Artiaga, Keren
%Y Joshi, Abhinav
%Y Singh, Sanjeet
%S Proceedings of the Workshop on Sign Language Processing (WSLP)
%D 2025
%8 December
%I Association for Computational Linguistics
%C IIT Bombay, Mumbai, India (Co-located with IJCNLP–AACL 2025)
%@ 979-8-89176-304-3
%F bianco-etal-2025-multilingual
%X Sign languages are highly diverse across countries and regions, yet most Sign Language Translation (SLT) work remains monolingual. We explore a unified, multi-target SLT model trained jointly on four sign languages (German, Greek, Argentinian, Indian) using a standardized data layer. Our model operates on pose keypoints extracted with MediaPipe, yielding a lightweight and dataset-agnostic representation that is less sensitive to backgrounds, clothing, cameras, or signer identity while retaining motion and configuration cues. On RWTH-PHOENIX-Weather 2014T, Greek Sign Language Dataset, LSA-T, and ISLTranslate, naive joint training under a fully shared parameterization performs worse than monolingual baselines; however, a simple two-stage schedule: multilingual pre-training followed by a short language-specific fine-tuning, recovers and surpasses monolingual results on three datasets (PHOENIX14T: +0.15 BLEU-4; GSL: +0.74; ISL: +0.10) while narrowing the gap on the most challenging corpus (LSA-T: -0.24 vs. monolingual). Scores span from BLEU-4\approx 1 on open-domain news (LSA-T) to >90 on constrained curricula (GSL), highlighting the role of dataset complexity. We release our code to facilitate training and evaluation of multilingual SLT models.
%U https://aclanthology.org/2025.wslp-main.5/
%P 27-32
Markdown (Informal)
[Multilingual Sign Language Translation with Unified Datasets and Pose-Based Transformers](https://aclanthology.org/2025.wslp-main.5/) (Bianco et al., WSLP 2025)
ACL