@inproceedings{hwang-etal-2022-data,
title = "Data-efficient end-to-end Information Extraction for Statistical Legal Analysis",
author = "Hwang, Wonseok and
Eom, Saehee and
Lee, Hanuhl and
Park, Hai Jin and
Seo, Minjoon",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goan{\textcommabelow{t}}{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preo{\textcommabelow{t}}iuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nllp-1.12",
doi = "10.18653/v1/2022.nllp-1.12",
pages = "143--152",
abstract = "Legal practitioners often face a vast amount of documents. Lawyers, for instance, search for appropriate precedents favorable to their clients, while the number of legal precedents is ever-growing. Although legal search engines can assist finding individual target documents and narrowing down the number of candidates, retrieved information is often presented as unstructured text and users have to examine each document thoroughly which could lead to information overloading. This also makes their statistical analysis challenging. Here, we present an end-to-end information extraction (IE) system for legal documents. By formulating IE as a generation task, our system can be easily applied to various tasks without domain-specific engineering effort. The experimental results of four IE tasks on Korean precedents shows that our IE system can achieve competent scores (-2.3 on average) compared to the rule-based baseline with as few as 50 training examples per task and higher score (+5.4 on average) with 200 examples. Finally, our statistical analysis on two case categories {---} drunk driving and fraud {---} with 35k precedents reveals the resulting structured information from our IE system faithfully reflects the macroscopic features of Korean legal system.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hwang-etal-2022-data">
<titleInfo>
<title>Data-efficient end-to-end Information Extraction for Statistical Legal Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wonseok</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saehee</namePart>
<namePart type="family">Eom</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanuhl</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hai</namePart>
<namePart type="given">Jin</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minjoon</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cătălina</namePart>
<namePart type="family">Goan\textcommabelowtă</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preo\textcommabelowtiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Legal practitioners often face a vast amount of documents. Lawyers, for instance, search for appropriate precedents favorable to their clients, while the number of legal precedents is ever-growing. Although legal search engines can assist finding individual target documents and narrowing down the number of candidates, retrieved information is often presented as unstructured text and users have to examine each document thoroughly which could lead to information overloading. This also makes their statistical analysis challenging. Here, we present an end-to-end information extraction (IE) system for legal documents. By formulating IE as a generation task, our system can be easily applied to various tasks without domain-specific engineering effort. The experimental results of four IE tasks on Korean precedents shows that our IE system can achieve competent scores (-2.3 on average) compared to the rule-based baseline with as few as 50 training examples per task and higher score (+5.4 on average) with 200 examples. Finally, our statistical analysis on two case categories — drunk driving and fraud — with 35k precedents reveals the resulting structured information from our IE system faithfully reflects the macroscopic features of Korean legal system.</abstract>
<identifier type="citekey">hwang-etal-2022-data</identifier>
<identifier type="doi">10.18653/v1/2022.nllp-1.12</identifier>
<location>
<url>https://aclanthology.org/2022.nllp-1.12</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>143</start>
<end>152</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-efficient end-to-end Information Extraction for Statistical Legal Analysis
%A Hwang, Wonseok
%A Eom, Saehee
%A Lee, Hanuhl
%A Park, Hai Jin
%A Seo, Minjoon
%Y Aletras, Nikolaos
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Goan\textcommabelowtă, Cătălina
%Y Preo\textcommabelowtiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F hwang-etal-2022-data
%X Legal practitioners often face a vast amount of documents. Lawyers, for instance, search for appropriate precedents favorable to their clients, while the number of legal precedents is ever-growing. Although legal search engines can assist finding individual target documents and narrowing down the number of candidates, retrieved information is often presented as unstructured text and users have to examine each document thoroughly which could lead to information overloading. This also makes their statistical analysis challenging. Here, we present an end-to-end information extraction (IE) system for legal documents. By formulating IE as a generation task, our system can be easily applied to various tasks without domain-specific engineering effort. The experimental results of four IE tasks on Korean precedents shows that our IE system can achieve competent scores (-2.3 on average) compared to the rule-based baseline with as few as 50 training examples per task and higher score (+5.4 on average) with 200 examples. Finally, our statistical analysis on two case categories — drunk driving and fraud — with 35k precedents reveals the resulting structured information from our IE system faithfully reflects the macroscopic features of Korean legal system.
%R 10.18653/v1/2022.nllp-1.12
%U https://aclanthology.org/2022.nllp-1.12
%U https://doi.org/10.18653/v1/2022.nllp-1.12
%P 143-152
Markdown (Informal)
[Data-efficient end-to-end Information Extraction for Statistical Legal Analysis](https://aclanthology.org/2022.nllp-1.12) (Hwang et al., NLLP 2022)
ACL