@inproceedings{tran-etal-2024-deberta,
title = "{D}e{BERT}a Beats Behemoths: A Comparative Analysis of Fine-Tuning, Prompting, and {PEFT} Approaches on {L}egal{L}ens{NER}",
author = "Tran, Hanh Thi Hong and
Chatterjee, Nishan and
Pollak, Senja and
Doucet, Antoine",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goan{\textcommabelow{t}}{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preo{\textcommabelow{t}}iuc-Pietro, Daniel and
Spanakis, Gerasimos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2024",
month = nov,
year = "2024",
address = "Miami, FL, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.nllp-1.34",
pages = "371--380",
abstract = "This paper summarizes the participation of our team (Flawless Lawgic) in the legal named entity recognition (L-NER) task at LegalLens 2024: Detecting Legal Violations. Given possible unstructured texts (e.g., online media texts), we aim to identify legal violations by extracting legal entities such as {``}violation{''}, {``}violation by{''}, {``}violation on{''}, and {``}law{''}. This system-description paper discusses our approaches to address the task, empirically highlighting the performances of fine-tuning models from the Transformers family (e.g., RoBERTa and DeBERTa) against open-sourced LLMs (e.g., Llama, Mistral) with different tuning settings (e.g., LoRA, Supervised Fine-Tuning (SFT) and prompting strategies). Our best results, with a weighted F1 of 0.705 on the test set, show a 30 percentage points increase in F1 compared to the baseline and rank 2 on the leaderboard, leaving a marginal gap of only 0.4 percentage points lower than the top solution. Our solutions are available at github.com/honghanhh/lner.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tran-etal-2024-deberta">
<titleInfo>
<title>DeBERTa Beats Behemoths: A Comparative Analysis of Fine-Tuning, Prompting, and PEFT Approaches on LegalLensNER</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanh</namePart>
<namePart type="given">Thi</namePart>
<namePart type="given">Hong</namePart>
<namePart type="family">Tran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nishan</namePart>
<namePart type="family">Chatterjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Doucet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cătălina</namePart>
<namePart type="family">Goan\textcommabelowtă</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preo\textcommabelowtiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, FL, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper summarizes the participation of our team (Flawless Lawgic) in the legal named entity recognition (L-NER) task at LegalLens 2024: Detecting Legal Violations. Given possible unstructured texts (e.g., online media texts), we aim to identify legal violations by extracting legal entities such as “violation”, “violation by”, “violation on”, and “law”. This system-description paper discusses our approaches to address the task, empirically highlighting the performances of fine-tuning models from the Transformers family (e.g., RoBERTa and DeBERTa) against open-sourced LLMs (e.g., Llama, Mistral) with different tuning settings (e.g., LoRA, Supervised Fine-Tuning (SFT) and prompting strategies). Our best results, with a weighted F1 of 0.705 on the test set, show a 30 percentage points increase in F1 compared to the baseline and rank 2 on the leaderboard, leaving a marginal gap of only 0.4 percentage points lower than the top solution. Our solutions are available at github.com/honghanhh/lner.</abstract>
<identifier type="citekey">tran-etal-2024-deberta</identifier>
<location>
<url>https://aclanthology.org/2024.nllp-1.34</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>371</start>
<end>380</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DeBERTa Beats Behemoths: A Comparative Analysis of Fine-Tuning, Prompting, and PEFT Approaches on LegalLensNER
%A Tran, Hanh Thi Hong
%A Chatterjee, Nishan
%A Pollak, Senja
%A Doucet, Antoine
%Y Aletras, Nikolaos
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Goan\textcommabelowtă, Cătălina
%Y Preo\textcommabelowtiuc-Pietro, Daniel
%Y Spanakis, Gerasimos
%S Proceedings of the Natural Legal Language Processing Workshop 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, FL, USA
%F tran-etal-2024-deberta
%X This paper summarizes the participation of our team (Flawless Lawgic) in the legal named entity recognition (L-NER) task at LegalLens 2024: Detecting Legal Violations. Given possible unstructured texts (e.g., online media texts), we aim to identify legal violations by extracting legal entities such as “violation”, “violation by”, “violation on”, and “law”. This system-description paper discusses our approaches to address the task, empirically highlighting the performances of fine-tuning models from the Transformers family (e.g., RoBERTa and DeBERTa) against open-sourced LLMs (e.g., Llama, Mistral) with different tuning settings (e.g., LoRA, Supervised Fine-Tuning (SFT) and prompting strategies). Our best results, with a weighted F1 of 0.705 on the test set, show a 30 percentage points increase in F1 compared to the baseline and rank 2 on the leaderboard, leaving a marginal gap of only 0.4 percentage points lower than the top solution. Our solutions are available at github.com/honghanhh/lner.
%U https://aclanthology.org/2024.nllp-1.34
%P 371-380
Markdown (Informal)
[DeBERTa Beats Behemoths: A Comparative Analysis of Fine-Tuning, Prompting, and PEFT Approaches on LegalLensNER](https://aclanthology.org/2024.nllp-1.34) (Tran et al., NLLP 2024)
ACL