@inproceedings{martins-etal-2026-libras,
title = "{L}ibras-{UFP}el Corpus: A Parallel Dataset of {B}razilian {S}ign {L}anguage and {P}ortuguese for Multimodal Research and Processing",
author = "Martins, Antonielle and
Santana, Brenda S. and
Martins, Francielle and
Lebedeff, Tatiana and
Nunes, Darley and
Bohm, Luisa",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.112/",
pages = "1068--1073",
ISBN = "979-8-89176-387-6",
abstract = "The Libras-UFPel Corpus is a multimodal, multilayer parallel resource designed for the documentation and computational analysis of Brazilian Sign Language (Libras) in systematic alignment with written Portuguese. By integrating controlled recordings with naturalistic data from the Invent{\'a}rio Nacional de Libras-Pelotas, the corpus ensures interoperability through shared methodological standards. The dataset currently comprises 4,800 controlledaudiovisual records (2,400 sentences and 2,400 isolated signs) fully paired with Portuguese translations, supplemented by approximately 10 hours of spontaneous interaction from threenew naturalistic interviews, currently in the editing phase. To date, 1,200 controlled sentences have been lemmatized, gloss-annotatedand translated, providing a structured parallel subset for Libras-to-Portuguese Sign Language Processing tasks such as recognition and machine translation. The annotation model follows a hierarchical structure covering lexical, partially lexical, and non-lexical signs, including independent tiers for non-manual markers. By bridging descriptive linguistics and Natural Language Processing, Libras-UFPel Corpus serves as a reference source for bilingual data-driven modeling, advancing digital inclusion and linguistic accessibility."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="martins-etal-2026-libras">
<titleInfo>
<title>Libras-UFPel Corpus: A Parallel Dataset of Brazilian Sign Language and Portuguese for Multimodal Research and Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonielle</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brenda</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Santana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francielle</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Lebedeff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Darley</namePart>
<namePart type="family">Nunes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bohm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>The Libras-UFPel Corpus is a multimodal, multilayer parallel resource designed for the documentation and computational analysis of Brazilian Sign Language (Libras) in systematic alignment with written Portuguese. By integrating controlled recordings with naturalistic data from the Inventário Nacional de Libras-Pelotas, the corpus ensures interoperability through shared methodological standards. The dataset currently comprises 4,800 controlledaudiovisual records (2,400 sentences and 2,400 isolated signs) fully paired with Portuguese translations, supplemented by approximately 10 hours of spontaneous interaction from threenew naturalistic interviews, currently in the editing phase. To date, 1,200 controlled sentences have been lemmatized, gloss-annotatedand translated, providing a structured parallel subset for Libras-to-Portuguese Sign Language Processing tasks such as recognition and machine translation. The annotation model follows a hierarchical structure covering lexical, partially lexical, and non-lexical signs, including independent tiers for non-manual markers. By bridging descriptive linguistics and Natural Language Processing, Libras-UFPel Corpus serves as a reference source for bilingual data-driven modeling, advancing digital inclusion and linguistic accessibility.</abstract>
<identifier type="citekey">martins-etal-2026-libras</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.112/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>1068</start>
<end>1073</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Libras-UFPel Corpus: A Parallel Dataset of Brazilian Sign Language and Portuguese for Multimodal Research and Processing
%A Martins, Antonielle
%A Santana, Brenda S.
%A Martins, Francielle
%A Lebedeff, Tatiana
%A Nunes, Darley
%A Bohm, Luisa
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F martins-etal-2026-libras
%X The Libras-UFPel Corpus is a multimodal, multilayer parallel resource designed for the documentation and computational analysis of Brazilian Sign Language (Libras) in systematic alignment with written Portuguese. By integrating controlled recordings with naturalistic data from the Inventário Nacional de Libras-Pelotas, the corpus ensures interoperability through shared methodological standards. The dataset currently comprises 4,800 controlledaudiovisual records (2,400 sentences and 2,400 isolated signs) fully paired with Portuguese translations, supplemented by approximately 10 hours of spontaneous interaction from threenew naturalistic interviews, currently in the editing phase. To date, 1,200 controlled sentences have been lemmatized, gloss-annotatedand translated, providing a structured parallel subset for Libras-to-Portuguese Sign Language Processing tasks such as recognition and machine translation. The annotation model follows a hierarchical structure covering lexical, partially lexical, and non-lexical signs, including independent tiers for non-manual markers. By bridging descriptive linguistics and Natural Language Processing, Libras-UFPel Corpus serves as a reference source for bilingual data-driven modeling, advancing digital inclusion and linguistic accessibility.
%U https://aclanthology.org/2026.propor-1.112/
%P 1068-1073
Markdown (Informal)
[Libras-UFPel Corpus: A Parallel Dataset of Brazilian Sign Language and Portuguese for Multimodal Research and Processing](https://aclanthology.org/2026.propor-1.112/) (Martins et al., PROPOR 2026)
ACL