@inproceedings{quinta-de-castro-da-silva-2025-labor,
title = "Labor Lex: A New {P}ortuguese Corpus and Pipeline for Information Extraction in {B}razilian Legal Texts",
author = "Quinta de Castro, Pedro Vitor and
Da Silva, N{\'a}dia F{\'e}lix Felipe",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goanț{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preoțiuc-Pietro, Daniel and
Spanakis, Gerasimos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nllp-1.14/",
pages = "184--206",
ISBN = "979-8-89176-338-8",
abstract = "Relation Extraction (RE) is a challenging Natural Language Processing task that involves identifying named entities from text and classifying the relationships between them. When applied to a specific domain, the task acquires a new layer of complexity, handling the lexicon and context particular to the domain in question. In this work, this task is applied to the Legal domain, specifically targeting Brazilian Labor Law. Architectures based on Deep Learning, with word representations derived from Transformer Language Models (LM), have shown state-of-the-art performance for the RE task. Recent works on this task handle Named Entity Recognition (NER) and RE either as a single joint model or as a pipelined approach. In this work, we introduce Labor Lex, a newly constructed corpus based on public documents from Brazilian Labor Courts. We also present a pipeline of models trained on it. Different experiments are conducted for each task, comparing supervised training using LMs and In-Context Learning (ICL) with Large Language Models (LLM), and verifying and analyzing the results for each one. For the NER task, the best achieved result was 89.97{\%} F1-Score, and for the RE task, the best result was 82.38{\%} F1-Score. The best results for both tasks were obtained using the supervised training approach."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="quinta-de-castro-da-silva-2025-labor">
<titleInfo>
<title>Labor Lex: A New Portuguese Corpus and Pipeline for Information Extraction in Brazilian Legal Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="given">Vitor</namePart>
<namePart type="family">Quinta de Castro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nádia</namePart>
<namePart type="given">Félix</namePart>
<namePart type="given">Felipe</namePart>
<namePart type="family">Da Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cătălina</namePart>
<namePart type="family">Goanță</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoțiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-338-8</identifier>
</relatedItem>
<abstract>Relation Extraction (RE) is a challenging Natural Language Processing task that involves identifying named entities from text and classifying the relationships between them. When applied to a specific domain, the task acquires a new layer of complexity, handling the lexicon and context particular to the domain in question. In this work, this task is applied to the Legal domain, specifically targeting Brazilian Labor Law. Architectures based on Deep Learning, with word representations derived from Transformer Language Models (LM), have shown state-of-the-art performance for the RE task. Recent works on this task handle Named Entity Recognition (NER) and RE either as a single joint model or as a pipelined approach. In this work, we introduce Labor Lex, a newly constructed corpus based on public documents from Brazilian Labor Courts. We also present a pipeline of models trained on it. Different experiments are conducted for each task, comparing supervised training using LMs and In-Context Learning (ICL) with Large Language Models (LLM), and verifying and analyzing the results for each one. For the NER task, the best achieved result was 89.97% F1-Score, and for the RE task, the best result was 82.38% F1-Score. The best results for both tasks were obtained using the supervised training approach.</abstract>
<identifier type="citekey">quinta-de-castro-da-silva-2025-labor</identifier>
<location>
<url>https://aclanthology.org/2025.nllp-1.14/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>184</start>
<end>206</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Labor Lex: A New Portuguese Corpus and Pipeline for Information Extraction in Brazilian Legal Texts
%A Quinta de Castro, Pedro Vitor
%A Da Silva, Nádia Félix Felipe
%Y Aletras, Nikolaos
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Goanță, Cătălina
%Y Preoțiuc-Pietro, Daniel
%Y Spanakis, Gerasimos
%S Proceedings of the Natural Legal Language Processing Workshop 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-338-8
%F quinta-de-castro-da-silva-2025-labor
%X Relation Extraction (RE) is a challenging Natural Language Processing task that involves identifying named entities from text and classifying the relationships between them. When applied to a specific domain, the task acquires a new layer of complexity, handling the lexicon and context particular to the domain in question. In this work, this task is applied to the Legal domain, specifically targeting Brazilian Labor Law. Architectures based on Deep Learning, with word representations derived from Transformer Language Models (LM), have shown state-of-the-art performance for the RE task. Recent works on this task handle Named Entity Recognition (NER) and RE either as a single joint model or as a pipelined approach. In this work, we introduce Labor Lex, a newly constructed corpus based on public documents from Brazilian Labor Courts. We also present a pipeline of models trained on it. Different experiments are conducted for each task, comparing supervised training using LMs and In-Context Learning (ICL) with Large Language Models (LLM), and verifying and analyzing the results for each one. For the NER task, the best achieved result was 89.97% F1-Score, and for the RE task, the best result was 82.38% F1-Score. The best results for both tasks were obtained using the supervised training approach.
%U https://aclanthology.org/2025.nllp-1.14/
%P 184-206
Markdown (Informal)
[Labor Lex: A New Portuguese Corpus and Pipeline for Information Extraction in Brazilian Legal Texts](https://aclanthology.org/2025.nllp-1.14/) (Quinta de Castro & Da Silva, NLLP 2025)
ACL