@inproceedings{hong-etal-2021-learning,
title = "Learning from Limited Labels for Long Legal Dialogue",
author = "Hong, Jenny and
Chong, Derek and
Manning, Christopher",
editor = "Aletras, Nikolaos and
Androutsopoulos, Ion and
Barrett, Leslie and
Goanta, Catalina and
Preotiuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nllp-1.20",
doi = "10.18653/v1/2021.nllp-1.20",
pages = "190--204",
abstract = "We study attempting to achieve high accuracy information extraction of case factors from a challenging dataset of parole hearings, which, compared to other legal NLP datasets, has longer texts, with fewer labels. On this corpus, existing work directly applying pretrained neural models has failed to extract all but a few relatively basic items with little improvement over rule-based extraction. We address two challenges posed by existing work: training on long documents and reasoning over complex speech patterns. We use a similar approach to the two-step open-domain question answering approach by using a Reducer to extract relevant text segments and a Producer to generate both extractive answers and non-extractive classifications. In a context like ours, with limited labeled data, we show that a superior approach for strong performance within limited development time is to use a combination of a rule-based Reducer and a neural Producer. We study four representative tasks from the parole dataset. On all four, we improve extraction from the previous benchmark of 0.41{--}0.63 to 0.83{--}0.89 F1.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hong-etal-2021-learning">
<titleInfo>
<title>Learning from Limited Labels for Long Legal Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jenny</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="family">Chong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Manning</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Androutsopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preotiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We study attempting to achieve high accuracy information extraction of case factors from a challenging dataset of parole hearings, which, compared to other legal NLP datasets, has longer texts, with fewer labels. On this corpus, existing work directly applying pretrained neural models has failed to extract all but a few relatively basic items with little improvement over rule-based extraction. We address two challenges posed by existing work: training on long documents and reasoning over complex speech patterns. We use a similar approach to the two-step open-domain question answering approach by using a Reducer to extract relevant text segments and a Producer to generate both extractive answers and non-extractive classifications. In a context like ours, with limited labeled data, we show that a superior approach for strong performance within limited development time is to use a combination of a rule-based Reducer and a neural Producer. We study four representative tasks from the parole dataset. On all four, we improve extraction from the previous benchmark of 0.41–0.63 to 0.83–0.89 F1.</abstract>
<identifier type="citekey">hong-etal-2021-learning</identifier>
<identifier type="doi">10.18653/v1/2021.nllp-1.20</identifier>
<location>
<url>https://aclanthology.org/2021.nllp-1.20</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>190</start>
<end>204</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning from Limited Labels for Long Legal Dialogue
%A Hong, Jenny
%A Chong, Derek
%A Manning, Christopher
%Y Aletras, Nikolaos
%Y Androutsopoulos, Ion
%Y Barrett, Leslie
%Y Goanta, Catalina
%Y Preotiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F hong-etal-2021-learning
%X We study attempting to achieve high accuracy information extraction of case factors from a challenging dataset of parole hearings, which, compared to other legal NLP datasets, has longer texts, with fewer labels. On this corpus, existing work directly applying pretrained neural models has failed to extract all but a few relatively basic items with little improvement over rule-based extraction. We address two challenges posed by existing work: training on long documents and reasoning over complex speech patterns. We use a similar approach to the two-step open-domain question answering approach by using a Reducer to extract relevant text segments and a Producer to generate both extractive answers and non-extractive classifications. In a context like ours, with limited labeled data, we show that a superior approach for strong performance within limited development time is to use a combination of a rule-based Reducer and a neural Producer. We study four representative tasks from the parole dataset. On all four, we improve extraction from the previous benchmark of 0.41–0.63 to 0.83–0.89 F1.
%R 10.18653/v1/2021.nllp-1.20
%U https://aclanthology.org/2021.nllp-1.20
%U https://doi.org/10.18653/v1/2021.nllp-1.20
%P 190-204
Markdown (Informal)
[Learning from Limited Labels for Long Legal Dialogue](https://aclanthology.org/2021.nllp-1.20) (Hong et al., NLLP 2021)
ACL
- Jenny Hong, Derek Chong, and Christopher Manning. 2021. Learning from Limited Labels for Long Legal Dialogue. In Proceedings of the Natural Legal Language Processing Workshop 2021, pages 190–204, Punta Cana, Dominican Republic. Association for Computational Linguistics.