@inproceedings{niklaus-etal-2023-automatic,
title = "Automatic Anonymization of {S}wiss Federal {S}upreme {C}ourt Rulings",
author = {Niklaus, Joel and
Mami{\'e}, Robin and
St{\"u}rmer, Matthias and
Brunner, Daniel and
Gygli, Marcel},
editor = "Preo{\textcommabelow{t}}iuc-Pietro, Daniel and
Goanta, Catalina and
Chalkidis, Ilias and
Barrett, Leslie and
Spanakis, Gerasimos and
Aletras, Nikolaos",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.nllp-1.16",
doi = "10.18653/v1/2023.nllp-1.16",
pages = "159--165",
abstract = "Releasing court decisions to the public relies on proper anonymization to protect all involved parties, where necessary. The Swiss Federal Supreme Court relies on an existing system that combines different traditional computational methods with human experts. In this work, we enhance the existing anonymization software using a large dataset annotated with entities to be anonymized. We compared BERT-based models with models pre-trained on in-domain data. Our results show that using in-domain data to pre-train the models further improves the F1-score by more than 5{\%} compared to existing models. Our work demonstrates that combining existing anonymization methods, such as regular expressions, with machine learning can further reduce manual labor and enhance automatic suggestions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="niklaus-etal-2023-automatic">
<titleInfo>
<title>Automatic Anonymization of Swiss Federal Supreme Court Rulings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Niklaus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Mamié</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Stürmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Brunner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcel</namePart>
<namePart type="family">Gygli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preo\textcommabelowtiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerasimos</namePart>
<namePart type="family">Spanakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Releasing court decisions to the public relies on proper anonymization to protect all involved parties, where necessary. The Swiss Federal Supreme Court relies on an existing system that combines different traditional computational methods with human experts. In this work, we enhance the existing anonymization software using a large dataset annotated with entities to be anonymized. We compared BERT-based models with models pre-trained on in-domain data. Our results show that using in-domain data to pre-train the models further improves the F1-score by more than 5% compared to existing models. Our work demonstrates that combining existing anonymization methods, such as regular expressions, with machine learning can further reduce manual labor and enhance automatic suggestions.</abstract>
<identifier type="citekey">niklaus-etal-2023-automatic</identifier>
<identifier type="doi">10.18653/v1/2023.nllp-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.nllp-1.16</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>159</start>
<end>165</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Anonymization of Swiss Federal Supreme Court Rulings
%A Niklaus, Joel
%A Mamié, Robin
%A Stürmer, Matthias
%A Brunner, Daniel
%A Gygli, Marcel
%Y Preo\textcommabelowtiuc-Pietro, Daniel
%Y Goanta, Catalina
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Spanakis, Gerasimos
%Y Aletras, Nikolaos
%S Proceedings of the Natural Legal Language Processing Workshop 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F niklaus-etal-2023-automatic
%X Releasing court decisions to the public relies on proper anonymization to protect all involved parties, where necessary. The Swiss Federal Supreme Court relies on an existing system that combines different traditional computational methods with human experts. In this work, we enhance the existing anonymization software using a large dataset annotated with entities to be anonymized. We compared BERT-based models with models pre-trained on in-domain data. Our results show that using in-domain data to pre-train the models further improves the F1-score by more than 5% compared to existing models. Our work demonstrates that combining existing anonymization methods, such as regular expressions, with machine learning can further reduce manual labor and enhance automatic suggestions.
%R 10.18653/v1/2023.nllp-1.16
%U https://aclanthology.org/2023.nllp-1.16
%U https://doi.org/10.18653/v1/2023.nllp-1.16
%P 159-165
Markdown (Informal)
[Automatic Anonymization of Swiss Federal Supreme Court Rulings](https://aclanthology.org/2023.nllp-1.16) (Niklaus et al., NLLP-WS 2023)
ACL