@inproceedings{bocchi-etal-2024-kevlar,
title = "{KEVLAR}: The Complete Resource for {E}uro{V}oc Classification of Legal Documents",
author = "Bocchi, Lorenzo and
Casula, Camilla and
Palmero Aprosio, Alessio",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.9/",
pages = "66--73",
ISBN = "979-12-210-7060-6",
abstract = "The use of Machine Learning and Artificial Intelligence in the Public Administration (PA) has increased in the last years. In particular, recent guidelines proposed by various governments for the classification of documents released by the PA suggest to use the EuroVoc thesaurus. In this paper, we present KEVLAR, an all-in-one solution for performing the above-mentioned task on acts belonging to the Public Administration. First, we create a collection of 8 million documents in 24 languages, tagged with EuroVoc labels, taken from EUR-Lex, the web portal of the European Union legislation. Then, we train different pre-trained BERT-based models, comparing the performance of base models with domain-specific and multilingual ones. We release the corpus, the best-performing models, and a Docker image containing the source code of the trainer, the REST API, and the web interface. This image can be employed out-of-the-box for document classification."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bocchi-etal-2024-kevlar">
<titleInfo>
<title>KEVLAR: The Complete Resource for EuroVoc Classification of Legal Documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lorenzo</namePart>
<namePart type="family">Bocchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Camilla</namePart>
<namePart type="family">Casula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessio</namePart>
<namePart type="family">Palmero Aprosio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>The use of Machine Learning and Artificial Intelligence in the Public Administration (PA) has increased in the last years. In particular, recent guidelines proposed by various governments for the classification of documents released by the PA suggest to use the EuroVoc thesaurus. In this paper, we present KEVLAR, an all-in-one solution for performing the above-mentioned task on acts belonging to the Public Administration. First, we create a collection of 8 million documents in 24 languages, tagged with EuroVoc labels, taken from EUR-Lex, the web portal of the European Union legislation. Then, we train different pre-trained BERT-based models, comparing the performance of base models with domain-specific and multilingual ones. We release the corpus, the best-performing models, and a Docker image containing the source code of the trainer, the REST API, and the web interface. This image can be employed out-of-the-box for document classification.</abstract>
<identifier type="citekey">bocchi-etal-2024-kevlar</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.9/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>66</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KEVLAR: The Complete Resource for EuroVoc Classification of Legal Documents
%A Bocchi, Lorenzo
%A Casula, Camilla
%A Palmero Aprosio, Alessio
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F bocchi-etal-2024-kevlar
%X The use of Machine Learning and Artificial Intelligence in the Public Administration (PA) has increased in the last years. In particular, recent guidelines proposed by various governments for the classification of documents released by the PA suggest to use the EuroVoc thesaurus. In this paper, we present KEVLAR, an all-in-one solution for performing the above-mentioned task on acts belonging to the Public Administration. First, we create a collection of 8 million documents in 24 languages, tagged with EuroVoc labels, taken from EUR-Lex, the web portal of the European Union legislation. Then, we train different pre-trained BERT-based models, comparing the performance of base models with domain-specific and multilingual ones. We release the corpus, the best-performing models, and a Docker image containing the source code of the trainer, the REST API, and the web interface. This image can be employed out-of-the-box for document classification.
%U https://aclanthology.org/2024.clicit-1.9/
%P 66-73
Markdown (Informal)
[KEVLAR: The Complete Resource for EuroVoc Classification of Legal Documents](https://aclanthology.org/2024.clicit-1.9/) (Bocchi et al., CLiC-it 2024)
ACL