@inproceedings{dornelles-2026-democratizing,
title = "Democratizing Legal Analytics: Resource-Efficient Information Extraction for {B}razilian Case Law",
author = "Dornelles, Rodrigo Filippi",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.103/",
pages = "1011--1020",
ISBN = "979-8-89176-387-6",
abstract = "Legal systems produce large volumes of high-stakes decisions in unstructured natural language, making large-scale empirical analysis costly, difficult to reproduce, and unevenly accessible. This bottleneck is especially acute for legal analytics and policy evaluation in low-resource languages such as Portuguese. To address it, we present a resource-efficient pipeline for information extraction from Brazilian criminal case law that reuses a legacy dataset to fine-tune open-weight LLMs with Q-LoRA. Operating in a small-data setting and using schema-constrained JSON generation, the pipeline extracts 47 legal variables spanning charges, evidence, and sentencing outcome. In held-out evaluation, a fine-tuned Phi-4 (14B) model achieves 92.8{\%} accuracy and 0.826 macro-F1, approaching proprietary baselines while retaining the cost and privacy benefits of local deployment. We then use the extracted data in a case study of the short-term effects of a recent Brazilian Supreme Court ruling on drug decriminalization, finding no statistically significant change in trafficking-conviction rates (p{\ensuremath{\geq}}0.05), a pattern consistent with short-run institutional inertia. More broadly, the paper contributes a reproducible framework for legal NLP and shows how legacy empirical datasets can support scalable legal analytics under severe resource constraints."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dornelles-2026-democratizing">
<titleInfo>
<title>Democratizing Legal Analytics: Resource-Efficient Information Extraction for Brazilian Case Law</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="given">Filippi</namePart>
<namePart type="family">Dornelles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Legal systems produce large volumes of high-stakes decisions in unstructured natural language, making large-scale empirical analysis costly, difficult to reproduce, and unevenly accessible. This bottleneck is especially acute for legal analytics and policy evaluation in low-resource languages such as Portuguese. To address it, we present a resource-efficient pipeline for information extraction from Brazilian criminal case law that reuses a legacy dataset to fine-tune open-weight LLMs with Q-LoRA. Operating in a small-data setting and using schema-constrained JSON generation, the pipeline extracts 47 legal variables spanning charges, evidence, and sentencing outcome. In held-out evaluation, a fine-tuned Phi-4 (14B) model achieves 92.8% accuracy and 0.826 macro-F1, approaching proprietary baselines while retaining the cost and privacy benefits of local deployment. We then use the extracted data in a case study of the short-term effects of a recent Brazilian Supreme Court ruling on drug decriminalization, finding no statistically significant change in trafficking-conviction rates (p\ensuremath\geq0.05), a pattern consistent with short-run institutional inertia. More broadly, the paper contributes a reproducible framework for legal NLP and shows how legacy empirical datasets can support scalable legal analytics under severe resource constraints.</abstract>
<identifier type="citekey">dornelles-2026-democratizing</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.103/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>1011</start>
<end>1020</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Democratizing Legal Analytics: Resource-Efficient Information Extraction for Brazilian Case Law
%A Dornelles, Rodrigo Filippi
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F dornelles-2026-democratizing
%X Legal systems produce large volumes of high-stakes decisions in unstructured natural language, making large-scale empirical analysis costly, difficult to reproduce, and unevenly accessible. This bottleneck is especially acute for legal analytics and policy evaluation in low-resource languages such as Portuguese. To address it, we present a resource-efficient pipeline for information extraction from Brazilian criminal case law that reuses a legacy dataset to fine-tune open-weight LLMs with Q-LoRA. Operating in a small-data setting and using schema-constrained JSON generation, the pipeline extracts 47 legal variables spanning charges, evidence, and sentencing outcome. In held-out evaluation, a fine-tuned Phi-4 (14B) model achieves 92.8% accuracy and 0.826 macro-F1, approaching proprietary baselines while retaining the cost and privacy benefits of local deployment. We then use the extracted data in a case study of the short-term effects of a recent Brazilian Supreme Court ruling on drug decriminalization, finding no statistically significant change in trafficking-conviction rates (p\ensuremath\geq0.05), a pattern consistent with short-run institutional inertia. More broadly, the paper contributes a reproducible framework for legal NLP and shows how legacy empirical datasets can support scalable legal analytics under severe resource constraints.
%U https://aclanthology.org/2026.propor-1.103/
%P 1011-1020
Markdown (Informal)
[Democratizing Legal Analytics: Resource-Efficient Information Extraction for Brazilian Case Law](https://aclanthology.org/2026.propor-1.103/) (Dornelles, PROPOR 2026)
ACL