@inproceedings{douka-etal-2021-juribert,
title = "{J}uri{BERT}: A Masked-Language Model Adaptation for {F}rench Legal Text",
author = "Douka, Stella and
Abdine, Hadi and
Vazirgiannis, Michalis and
El Hamdani, Rajaa and
Restrepo Amariles, David",
editor = "Aletras, Nikolaos and
Androutsopoulos, Ion and
Barrett, Leslie and
Goanta, Catalina and
Preotiuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nllp-1.9",
doi = "10.18653/v1/2021.nllp-1.9",
pages = "95--101",
abstract = "Language models have proven to be very useful when adapted to specific domains. Nonetheless, little research has been done on the adaptation of domain-specific BERT models in the French language. In this paper, we focus on creating a language model adapted to French legal text with the goal of helping law professionals. We conclude that some specific tasks do not benefit from generic language models pre-trained on large amounts of data. We explore the use of smaller architectures in domain-specific sub-languages and their benefits for French legal text. We prove that domain-specific pre-trained models can perform better than their equivalent generalised ones in the legal domain. Finally, we release JuriBERT, a new set of BERT models adapted to the French legal domain.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="douka-etal-2021-juribert">
<titleInfo>
<title>JuriBERT: A Masked-Language Model Adaptation for French Legal Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Douka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hadi</namePart>
<namePart type="family">Abdine</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michalis</namePart>
<namePart type="family">Vazirgiannis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajaa</namePart>
<namePart type="family">El Hamdani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Restrepo Amariles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Androutsopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preotiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models have proven to be very useful when adapted to specific domains. Nonetheless, little research has been done on the adaptation of domain-specific BERT models in the French language. In this paper, we focus on creating a language model adapted to French legal text with the goal of helping law professionals. We conclude that some specific tasks do not benefit from generic language models pre-trained on large amounts of data. We explore the use of smaller architectures in domain-specific sub-languages and their benefits for French legal text. We prove that domain-specific pre-trained models can perform better than their equivalent generalised ones in the legal domain. Finally, we release JuriBERT, a new set of BERT models adapted to the French legal domain.</abstract>
<identifier type="citekey">douka-etal-2021-juribert</identifier>
<identifier type="doi">10.18653/v1/2021.nllp-1.9</identifier>
<location>
<url>https://aclanthology.org/2021.nllp-1.9</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>95</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JuriBERT: A Masked-Language Model Adaptation for French Legal Text
%A Douka, Stella
%A Abdine, Hadi
%A Vazirgiannis, Michalis
%A El Hamdani, Rajaa
%A Restrepo Amariles, David
%Y Aletras, Nikolaos
%Y Androutsopoulos, Ion
%Y Barrett, Leslie
%Y Goanta, Catalina
%Y Preotiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F douka-etal-2021-juribert
%X Language models have proven to be very useful when adapted to specific domains. Nonetheless, little research has been done on the adaptation of domain-specific BERT models in the French language. In this paper, we focus on creating a language model adapted to French legal text with the goal of helping law professionals. We conclude that some specific tasks do not benefit from generic language models pre-trained on large amounts of data. We explore the use of smaller architectures in domain-specific sub-languages and their benefits for French legal text. We prove that domain-specific pre-trained models can perform better than their equivalent generalised ones in the legal domain. Finally, we release JuriBERT, a new set of BERT models adapted to the French legal domain.
%R 10.18653/v1/2021.nllp-1.9
%U https://aclanthology.org/2021.nllp-1.9
%U https://doi.org/10.18653/v1/2021.nllp-1.9
%P 95-101
Markdown (Informal)
[JuriBERT: A Masked-Language Model Adaptation for French Legal Text](https://aclanthology.org/2021.nllp-1.9) (Douka et al., NLLP 2021)
ACL
- Stella Douka, Hadi Abdine, Michalis Vazirgiannis, Rajaa El Hamdani, and David Restrepo Amariles. 2021. JuriBERT: A Masked-Language Model Adaptation for French Legal Text. In Proceedings of the Natural Legal Language Processing Workshop 2021, pages 95–101, Punta Cana, Dominican Republic. Association for Computational Linguistics.