@inproceedings{sargeant-etal-2025-detecting,
title = "Detecting Legal Citations in {U}nited {K}ingdom Court Judgments",
author = {Sargeant, Holli and
{\"O}stling, Andreas and
Magnusson, M{\r{a}}ns},
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1361/",
pages = "26798--26824",
ISBN = "979-8-89176-332-6",
abstract = "Legal citation detection in court judgments underpins reliable precedent mapping, citation analytics, and document retrieval. Extracting references to legislation and case law in the United Kingdom is especially challenging: citation styles have evolved over centuries, and judgments routinely cite foreign or historical authorities. We conduct the first systematic comparison of three modelling paradigms on this task using the Cambridge Law Corpus: (i) rule{-}based regular expressions; (ii) transformer-based encoders (BERT, RoBERTa, LEGAL{-}BERT, ModernBERT); and (iii) large language models (GPT{-}4.1). We produced a gold{-}standard high-quality corpus of 190 court judgments containing 45,179 fine-grained annotations for UK and non-UK legislation and case references. ModernBERT achieves a macro-averaged F1 of 93.3{\%}, only marginally ahead of the other encoder-only models, yet significantly outperforming the strongest regular-expression baseline (35.42{\%} F1) and GPT-4.1 (76.57{\%} F1)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sargeant-etal-2025-detecting">
<titleInfo>
<title>Detecting Legal Citations in United Kingdom Court Judgments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Holli</namePart>
<namePart type="family">Sargeant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Östling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Måns</namePart>
<namePart type="family">Magnusson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Legal citation detection in court judgments underpins reliable precedent mapping, citation analytics, and document retrieval. Extracting references to legislation and case law in the United Kingdom is especially challenging: citation styles have evolved over centuries, and judgments routinely cite foreign or historical authorities. We conduct the first systematic comparison of three modelling paradigms on this task using the Cambridge Law Corpus: (i) rule-based regular expressions; (ii) transformer-based encoders (BERT, RoBERTa, LEGAL-BERT, ModernBERT); and (iii) large language models (GPT-4.1). We produced a gold-standard high-quality corpus of 190 court judgments containing 45,179 fine-grained annotations for UK and non-UK legislation and case references. ModernBERT achieves a macro-averaged F1 of 93.3%, only marginally ahead of the other encoder-only models, yet significantly outperforming the strongest regular-expression baseline (35.42% F1) and GPT-4.1 (76.57% F1).</abstract>
<identifier type="citekey">sargeant-etal-2025-detecting</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1361/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>26798</start>
<end>26824</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Legal Citations in United Kingdom Court Judgments
%A Sargeant, Holli
%A Östling, Andreas
%A Magnusson, Måns
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F sargeant-etal-2025-detecting
%X Legal citation detection in court judgments underpins reliable precedent mapping, citation analytics, and document retrieval. Extracting references to legislation and case law in the United Kingdom is especially challenging: citation styles have evolved over centuries, and judgments routinely cite foreign or historical authorities. We conduct the first systematic comparison of three modelling paradigms on this task using the Cambridge Law Corpus: (i) rule-based regular expressions; (ii) transformer-based encoders (BERT, RoBERTa, LEGAL-BERT, ModernBERT); and (iii) large language models (GPT-4.1). We produced a gold-standard high-quality corpus of 190 court judgments containing 45,179 fine-grained annotations for UK and non-UK legislation and case references. ModernBERT achieves a macro-averaged F1 of 93.3%, only marginally ahead of the other encoder-only models, yet significantly outperforming the strongest regular-expression baseline (35.42% F1) and GPT-4.1 (76.57% F1).
%U https://aclanthology.org/2025.emnlp-main.1361/
%P 26798-26824
Markdown (Informal)
[Detecting Legal Citations in United Kingdom Court Judgments](https://aclanthology.org/2025.emnlp-main.1361/) (Sargeant et al., EMNLP 2025)
ACL