@inproceedings{van-hofslot-etal-2022-automatic,
title = "Automatic Classification of Legal Violations in Cookie Banner Texts",
author = "Van Hofslot, Marieke and
Akdag Salah, Almila and
Gatt, Albert and
Santos, Cristiana",
editor = "Aletras, Nikolaos and
Chalkidis, Ilias and
Barrett, Leslie and
Goan{\textcommabelow{t}}{\u{a}}, C{\u{a}}t{\u{a}}lina and
Preo{\textcommabelow{t}}iuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nllp-1.27",
doi = "10.18653/v1/2022.nllp-1.27",
pages = "287--295",
abstract = "Cookie banners are designed to request consent from website visitors for their personal data. Recent research suggest that a high percentage of cookie banners violate legal regulations as defined by the General Data Protection Regulation (GDPR) and the ePrivacy Directive. In this paper, we focus on language used in these cookie banners, and whether these violations can be automatically detected, or not. We make use of a small cookie banner dataset that is annotated by five experts for legal violations and test it with state of the art classification models, namely BERT, LEGAL-BERT, BART in a zero-shot setting and BERT with LIWC embeddings. Our results show that none of the models outperform the others in all classes, but in general, BERT and LEGAL-BERT provide the highest accuracy results (70{\%}-97{\%}). However, they are influenced by the small size and the unbalanced distributions in the dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-hofslot-etal-2022-automatic">
<titleInfo>
<title>Automatic Classification of Legal Violations in Cookie Banner Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marieke</namePart>
<namePart type="family">Van Hofslot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Almila</namePart>
<namePart type="family">Akdag Salah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Gatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristiana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilias</namePart>
<namePart type="family">Chalkidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cătălina</namePart>
<namePart type="family">Goan\textcommabelowtă</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preo\textcommabelowtiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cookie banners are designed to request consent from website visitors for their personal data. Recent research suggest that a high percentage of cookie banners violate legal regulations as defined by the General Data Protection Regulation (GDPR) and the ePrivacy Directive. In this paper, we focus on language used in these cookie banners, and whether these violations can be automatically detected, or not. We make use of a small cookie banner dataset that is annotated by five experts for legal violations and test it with state of the art classification models, namely BERT, LEGAL-BERT, BART in a zero-shot setting and BERT with LIWC embeddings. Our results show that none of the models outperform the others in all classes, but in general, BERT and LEGAL-BERT provide the highest accuracy results (70%-97%). However, they are influenced by the small size and the unbalanced distributions in the dataset.</abstract>
<identifier type="citekey">van-hofslot-etal-2022-automatic</identifier>
<identifier type="doi">10.18653/v1/2022.nllp-1.27</identifier>
<location>
<url>https://aclanthology.org/2022.nllp-1.27</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>287</start>
<end>295</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Classification of Legal Violations in Cookie Banner Texts
%A Van Hofslot, Marieke
%A Akdag Salah, Almila
%A Gatt, Albert
%A Santos, Cristiana
%Y Aletras, Nikolaos
%Y Chalkidis, Ilias
%Y Barrett, Leslie
%Y Goan\textcommabelowtă, Cătălina
%Y Preo\textcommabelowtiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F van-hofslot-etal-2022-automatic
%X Cookie banners are designed to request consent from website visitors for their personal data. Recent research suggest that a high percentage of cookie banners violate legal regulations as defined by the General Data Protection Regulation (GDPR) and the ePrivacy Directive. In this paper, we focus on language used in these cookie banners, and whether these violations can be automatically detected, or not. We make use of a small cookie banner dataset that is annotated by five experts for legal violations and test it with state of the art classification models, namely BERT, LEGAL-BERT, BART in a zero-shot setting and BERT with LIWC embeddings. Our results show that none of the models outperform the others in all classes, but in general, BERT and LEGAL-BERT provide the highest accuracy results (70%-97%). However, they are influenced by the small size and the unbalanced distributions in the dataset.
%R 10.18653/v1/2022.nllp-1.27
%U https://aclanthology.org/2022.nllp-1.27
%U https://doi.org/10.18653/v1/2022.nllp-1.27
%P 287-295
Markdown (Informal)
[Automatic Classification of Legal Violations in Cookie Banner Texts](https://aclanthology.org/2022.nllp-1.27) (Van Hofslot et al., NLLP 2022)
ACL