@inproceedings{wrzalik-krechel-2021-gerdalir,
title = "{G}er{D}a{LIR}: A {G}erman Dataset for Legal Information Retrieval",
author = "Wrzalik, Marco and
Krechel, Dirk",
editor = "Aletras, Nikolaos and
Androutsopoulos, Ion and
Barrett, Leslie and
Goanta, Catalina and
Preotiuc-Pietro, Daniel",
booktitle = "Proceedings of the Natural Legal Language Processing Workshop 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.nllp-1.13",
doi = "10.18653/v1/2021.nllp-1.13",
pages = "123--128",
abstract = "We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wrzalik-krechel-2021-gerdalir">
<titleInfo>
<title>GerDaLIR: A German Dataset for Legal Information Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Wrzalik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Krechel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Natural Legal Language Processing Workshop 2021</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ion</namePart>
<namePart type="family">Androutsopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leslie</namePart>
<namePart type="family">Barrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catalina</namePart>
<namePart type="family">Goanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Preotiuc-Pietro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.</abstract>
<identifier type="citekey">wrzalik-krechel-2021-gerdalir</identifier>
<identifier type="doi">10.18653/v1/2021.nllp-1.13</identifier>
<location>
<url>https://aclanthology.org/2021.nllp-1.13</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>123</start>
<end>128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GerDaLIR: A German Dataset for Legal Information Retrieval
%A Wrzalik, Marco
%A Krechel, Dirk
%Y Aletras, Nikolaos
%Y Androutsopoulos, Ion
%Y Barrett, Leslie
%Y Goanta, Catalina
%Y Preotiuc-Pietro, Daniel
%S Proceedings of the Natural Legal Language Processing Workshop 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F wrzalik-krechel-2021-gerdalir
%X We present GerDaLIR, a German Dataset for Legal Information Retrieval based on case documents from the open legal information platform Open Legal Data. The dataset consists of 123K queries, each labelled with at least one relevant document in a collection of 131K case documents. We conduct several baseline experiments including BM25 and a state-of-the-art neural re-ranker. With our dataset, we aim to provide a standardized benchmark for German LIR and promote open research in this area. Beyond that, our dataset comprises sufficient training data to be used as a downstream task for German or multilingual language models.
%R 10.18653/v1/2021.nllp-1.13
%U https://aclanthology.org/2021.nllp-1.13
%U https://doi.org/10.18653/v1/2021.nllp-1.13
%P 123-128
Markdown (Informal)
[GerDaLIR: A German Dataset for Legal Information Retrieval](https://aclanthology.org/2021.nllp-1.13) (Wrzalik & Krechel, NLLP 2021)
ACL