@inproceedings{paul-etal-2026-ilsic,
title = "{ILSIC}: Corpora for Identifying {I}ndian Legal Statutes from Queries by Laymen",
author = "Paul, Shounak and
Dogra, Raghav and
Goyal, Pawan and
Ghosh, Saptarshi",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.354/",
pages = "6725--6746",
ISBN = "979-8-89176-386-9",
abstract = "Legal Statute Identification (LSI) for a given situation is one of the most fundamental tasks in Legal NLP. This task has traditionally been modeled using facts from court judgments as input queries, due to their abundance. However, in practical settings, the input queries are likely to be informal and asked by laypersons, or non-professionals. While a few laypeople LSI datasets exist, there has been little research to explore the differences between court and laypeople data for LSI. In this work, we create ILSIC, a corpus of laypeople queries covering 500+ statutes from Indian law. Additionally, the corpus also contains court case judgements to enable researchers to effectively compare between court and laypeople data for LSI. We conducted extensive experiments on our corpus, including benchmarking over the laypeople dataset using zero and few-shot inference, retrieval-augmented generation and supervised fine-tuning. We observe that models trained purely on court judgements are ineffective during test on laypeople queries, while transfer learning from court to laypeople data can be beneficial in certain scenarios. We also conducted fine-grained analyses of our results in terms of categories of queries and frequency of statutes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="paul-etal-2026-ilsic">
<titleInfo>
<title>ILSIC: Corpora for Identifying Indian Legal Statutes from Queries by Laymen</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shounak</namePart>
<namePart type="family">Paul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raghav</namePart>
<namePart type="family">Dogra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pawan</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saptarshi</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Legal Statute Identification (LSI) for a given situation is one of the most fundamental tasks in Legal NLP. This task has traditionally been modeled using facts from court judgments as input queries, due to their abundance. However, in practical settings, the input queries are likely to be informal and asked by laypersons, or non-professionals. While a few laypeople LSI datasets exist, there has been little research to explore the differences between court and laypeople data for LSI. In this work, we create ILSIC, a corpus of laypeople queries covering 500+ statutes from Indian law. Additionally, the corpus also contains court case judgements to enable researchers to effectively compare between court and laypeople data for LSI. We conducted extensive experiments on our corpus, including benchmarking over the laypeople dataset using zero and few-shot inference, retrieval-augmented generation and supervised fine-tuning. We observe that models trained purely on court judgements are ineffective during test on laypeople queries, while transfer learning from court to laypeople data can be beneficial in certain scenarios. We also conducted fine-grained analyses of our results in terms of categories of queries and frequency of statutes.</abstract>
<identifier type="citekey">paul-etal-2026-ilsic</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.354/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>6725</start>
<end>6746</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ILSIC: Corpora for Identifying Indian Legal Statutes from Queries by Laymen
%A Paul, Shounak
%A Dogra, Raghav
%A Goyal, Pawan
%A Ghosh, Saptarshi
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F paul-etal-2026-ilsic
%X Legal Statute Identification (LSI) for a given situation is one of the most fundamental tasks in Legal NLP. This task has traditionally been modeled using facts from court judgments as input queries, due to their abundance. However, in practical settings, the input queries are likely to be informal and asked by laypersons, or non-professionals. While a few laypeople LSI datasets exist, there has been little research to explore the differences between court and laypeople data for LSI. In this work, we create ILSIC, a corpus of laypeople queries covering 500+ statutes from Indian law. Additionally, the corpus also contains court case judgements to enable researchers to effectively compare between court and laypeople data for LSI. We conducted extensive experiments on our corpus, including benchmarking over the laypeople dataset using zero and few-shot inference, retrieval-augmented generation and supervised fine-tuning. We observe that models trained purely on court judgements are ineffective during test on laypeople queries, while transfer learning from court to laypeople data can be beneficial in certain scenarios. We also conducted fine-grained analyses of our results in terms of categories of queries and frequency of statutes.
%U https://aclanthology.org/2026.findings-eacl.354/
%P 6725-6746
Markdown (Informal)
[ILSIC: Corpora for Identifying Indian Legal Statutes from Queries by Laymen](https://aclanthology.org/2026.findings-eacl.354/) (Paul et al., Findings 2026)
ACL