@inproceedings{mali-etal-2024-information,
title = "Information Extraction for Planning Court Cases",
author = "Mali, Drish and
Mali, Rubash and
Barale, Claire",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goan{\textcommabelow{t}}{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preo{\textcommabelow{t}}iuc-Pietro, Daniel and
Spanakis, Gerasimos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2024",
month = nov,
year = "2024",
address = "Miami, FL, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nllp-1.8",
pages = "97--114",
abstract = "Legal documents are often long and unstructured, making them challenging and time-consuming to apprehend. An automatic system that can identify relevant entities and labels within legal documents, would significantly reduce the legal research time. We developed a system to streamline legal case analysis from planning courts by extracting key information from XML files using Named Entity Recognition (NER) and multi-label classification models to convert them into structured form. This research contributes three novel datasets for the Planning Court cases: a NER dataset, a multi-label dataset fully annotated by humans, and newly re-annotated multi-label datasets partially annotated using LLMs. We experimented with various general-purpose and legal domain-specific models with different maximum sequence lengths. It was noted that incorporating paragraph position information improved the performance of models for the multi-label classification task. Our research highlighted the importance of domain-specific models, with LegalRoBERTa and LexLM demonstrating the best performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mali-etal-2024-information">
<titleInfo>
<title>Information Extraction for Planning Court Cases</title>
</titleInfo>
<name type="personal">
<namePart type="given">Drish</namePart>
<namePart type="family">Mali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rubash</namePart>
<namePart type="family">Mali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Barale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cătălina</namePart>
<namePart type="family">Goan\textcommabelowtă</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preo\textcommabelowtiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, FL, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Legal documents are often long and unstructured, making them challenging and time-consuming to apprehend. An automatic system that can identify relevant entities and labels within legal documents, would significantly reduce the legal research time. We developed a system to streamline legal case analysis from planning courts by extracting key information from XML files using Named Entity Recognition (NER) and multi-label classification models to convert them into structured form. This research contributes three novel datasets for the Planning Court cases: a NER dataset, a multi-label dataset fully annotated by humans, and newly re-annotated multi-label datasets partially annotated using LLMs. We experimented with various general-purpose and legal domain-specific models with different maximum sequence lengths. It was noted that incorporating paragraph position information improved the performance of models for the multi-label classification task. Our research highlighted the importance of domain-specific models, with LegalRoBERTa and LexLM demonstrating the best performance.</abstract>
<identifier type="citekey">mali-etal-2024-information</identifier>
<location>
<url>https://aclanthology.org/2024.nllp-1.8</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>97</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Information Extraction for Planning Court Cases
%A Mali, Drish
%A Mali, Rubash
%A Barale, Claire
%Y Aletras, Nikolaos
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Goan\textcommabelowtă, Cătălina
%Y Preo\textcommabelowtiuc-Pietro, Daniel
%Y Spanakis, Gerasimos
%S Proceedings of the Natural Legal Language Processing Workshop 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, FL, USA
%F mali-etal-2024-information
%X Legal documents are often long and unstructured, making them challenging and time-consuming to apprehend. An automatic system that can identify relevant entities and labels within legal documents, would significantly reduce the legal research time. We developed a system to streamline legal case analysis from planning courts by extracting key information from XML files using Named Entity Recognition (NER) and multi-label classification models to convert them into structured form. This research contributes three novel datasets for the Planning Court cases: a NER dataset, a multi-label dataset fully annotated by humans, and newly re-annotated multi-label datasets partially annotated using LLMs. We experimented with various general-purpose and legal domain-specific models with different maximum sequence lengths. It was noted that incorporating paragraph position information improved the performance of models for the multi-label classification task. Our research highlighted the importance of domain-specific models, with LegalRoBERTa and LexLM demonstrating the best performance.
%U https://aclanthology.org/2024.nllp-1.8
%P 97-114
Markdown (Informal)
[Information Extraction for Planning Court Cases](https://aclanthology.org/2024.nllp-1.8) (Mali et al., NLLP 2024)
ACL
- Drish Mali, Rubash Mali, and Claire Barale. 2024. Information Extraction for Planning Court Cases. In Proceedings of the Natural Legal Language Processing Workshop 2024, pages 97–114, Miami, FL, USA. Association for Computational Linguistics.