@inproceedings{barale-etal-2023-asylex,
title = "{A}sy{L}ex: A Dataset for Legal Language Processing of Refugee Claims",
author = "Barale, Claire and
Klaisoongnoen, Mark and
Minervini, Pasquale and
Rovatsos, Michael and
Bhuta, Nehal",
editor = "Preoțiuc-Pietro, Daniel and
Goanta, Catalina and
Chalkidis, Ilias and
Barrett, Leslie and
Spanakis, Gerasimos and
Aletras, Nikolaos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nllp-1.24/",
doi = "10.18653/v1/2023.nllp-1.24",
pages = "244--257",
abstract = "Advancements in natural language processing (NLP) and language models have demonstrated immense potential in the legal domain, enabling automated analysis and comprehension of legal texts. However, developing robust models in Legal NLP is significantly challenged by the scarcity of resources. This paper presents AsyLex, the first dataset specifically designed for Refugee Law applications to address this gap. The dataset introduces 59,112 documents on refugee status determination in Canada from 1996 to 2022, providing researchers and practitioners with essential material for training and evaluating NLP models for legal research and case review. Case review is defined as entity extraction and outcome prediction tasks. The dataset includes 19,115 gold-standard human-labeled annotations for 20 legally relevant entity types curated with the help of legal experts and 1,682 gold-standard labeled documents for the case outcome. Furthermore, we supply the corresponding trained entity extraction models and the resulting labeled entities generated through the inference process on AsyLex. Four supplementary features are obtained through rule-based extraction. We demonstrate the usefulness of our dataset on the legal judgment prediction task to predict the binary outcome and test a set of baselines using the text of the documents and our annotations. We observe that models pretrained on similar legal documents reach better scores, suggesting that acquiring more datasets for specialized domains such as law is crucial."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barale-etal-2023-asylex">
<titleInfo>
<title>AsyLex: A Dataset for Legal Language Processing of Refugee Claims</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Barale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Klaisoongnoen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pasquale</namePart>
<namePart type="family">Minervini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Rovatsos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nehal</namePart>
<namePart type="family">Bhuta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preoțiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Advancements in natural language processing (NLP) and language models have demonstrated immense potential in the legal domain, enabling automated analysis and comprehension of legal texts. However, developing robust models in Legal NLP is significantly challenged by the scarcity of resources. This paper presents AsyLex, the first dataset specifically designed for Refugee Law applications to address this gap. The dataset introduces 59,112 documents on refugee status determination in Canada from 1996 to 2022, providing researchers and practitioners with essential material for training and evaluating NLP models for legal research and case review. Case review is defined as entity extraction and outcome prediction tasks. The dataset includes 19,115 gold-standard human-labeled annotations for 20 legally relevant entity types curated with the help of legal experts and 1,682 gold-standard labeled documents for the case outcome. Furthermore, we supply the corresponding trained entity extraction models and the resulting labeled entities generated through the inference process on AsyLex. Four supplementary features are obtained through rule-based extraction. We demonstrate the usefulness of our dataset on the legal judgment prediction task to predict the binary outcome and test a set of baselines using the text of the documents and our annotations. We observe that models pretrained on similar legal documents reach better scores, suggesting that acquiring more datasets for specialized domains such as law is crucial.</abstract>
<identifier type="citekey">barale-etal-2023-asylex</identifier>
<identifier type="doi">10.18653/v1/2023.nllp-1.24</identifier>
<location>
<url>https://aclanthology.org/2023.nllp-1.24/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>244</start>
<end>257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AsyLex: A Dataset for Legal Language Processing of Refugee Claims
%A Barale, Claire
%A Klaisoongnoen, Mark
%A Minervini, Pasquale
%A Rovatsos, Michael
%A Bhuta, Nehal
%Y Preoțiuc-Pietro, Daniel
%Y Goanta, Catalina
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Spanakis, Gerasimos
%Y Aletras, Nikolaos
%S Proceedings of the Natural Legal Language Processing Workshop 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F barale-etal-2023-asylex
%X Advancements in natural language processing (NLP) and language models have demonstrated immense potential in the legal domain, enabling automated analysis and comprehension of legal texts. However, developing robust models in Legal NLP is significantly challenged by the scarcity of resources. This paper presents AsyLex, the first dataset specifically designed for Refugee Law applications to address this gap. The dataset introduces 59,112 documents on refugee status determination in Canada from 1996 to 2022, providing researchers and practitioners with essential material for training and evaluating NLP models for legal research and case review. Case review is defined as entity extraction and outcome prediction tasks. The dataset includes 19,115 gold-standard human-labeled annotations for 20 legally relevant entity types curated with the help of legal experts and 1,682 gold-standard labeled documents for the case outcome. Furthermore, we supply the corresponding trained entity extraction models and the resulting labeled entities generated through the inference process on AsyLex. Four supplementary features are obtained through rule-based extraction. We demonstrate the usefulness of our dataset on the legal judgment prediction task to predict the binary outcome and test a set of baselines using the text of the documents and our annotations. We observe that models pretrained on similar legal documents reach better scores, suggesting that acquiring more datasets for specialized domains such as law is crucial.
%R 10.18653/v1/2023.nllp-1.24
%U https://aclanthology.org/2023.nllp-1.24/
%U https://doi.org/10.18653/v1/2023.nllp-1.24
%P 244-257
Markdown (Informal)
[AsyLex: A Dataset for Legal Language Processing of Refugee Claims](https://aclanthology.org/2023.nllp-1.24/) (Barale et al., NLLP 2023)
ACL