@inproceedings{bayer-etal-2025-regnlp,
title = "A {REGNLP} Framework: Developing Retrieval-Augmented Generation for Regulatory Document Analysis",
author = {Bayer, Ozan and
Ulu, Elif Nehir and
Sark{\i}n, Yasemin and
S{\"u}t{\c{c}}{\"u}, Ekrem and
{\c{C}}elik, Defne Buse and
Karamanl{\i}o{\u{g}}lu, Alper and
Karakaya, {\.I}smail and
Demirel, Berkan},
editor = "Gokhan, Tuba and
Wang, Kexin and
Gurevych, Iryna and
Briscoe, Ted",
booktitle = "Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.regnlp-1.15/",
pages = "97--101",
abstract = "This study presents the development of a Retrieval-Augmented Generation (RAG) framework tailored for analyzing regulatory documents from the Abu Dhabi Global Markets (ADGM). The methodology encompasses comprehensive data preprocessing, including extraction, cleaning, and compression of documents, as well as the organization of the ObliQA dataset. The embedding model is utilized for generating embeddings during the retrieval phase, facilitated by the txtai library for managing embeddings and streamlining testing. The training process incorporated innovative strategies such as duplicate recognition, dropout implementation, pooling adjustments, and label modifications to enhance retrieval performance. Hyperparameter tuning further refined the retrieval component, with improvements validated using the recall@10 metric, which measures the proportion of relevant passages among the top-10 results. The refined retrieval component effectively identifies pertinent passages within regulatory documents, expediting information access and supporting compliance efforts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bayer-etal-2025-regnlp">
<titleInfo>
<title>A REGNLP Framework: Developing Retrieval-Augmented Generation for Regulatory Document Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ozan</namePart>
<namePart type="family">Bayer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elif</namePart>
<namePart type="given">Nehir</namePart>
<namePart type="family">Ulu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasemin</namePart>
<namePart type="family">Sarkın</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekrem</namePart>
<namePart type="family">Sütçü</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Defne</namePart>
<namePart type="given">Buse</namePart>
<namePart type="family">Çelik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alper</namePart>
<namePart type="family">Karamanlıoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">İsmail</namePart>
<namePart type="family">Karakaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Berkan</namePart>
<namePart type="family">Demirel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tuba</namePart>
<namePart type="family">Gokhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kexin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ted</namePart>
<namePart type="family">Briscoe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study presents the development of a Retrieval-Augmented Generation (RAG) framework tailored for analyzing regulatory documents from the Abu Dhabi Global Markets (ADGM). The methodology encompasses comprehensive data preprocessing, including extraction, cleaning, and compression of documents, as well as the organization of the ObliQA dataset. The embedding model is utilized for generating embeddings during the retrieval phase, facilitated by the txtai library for managing embeddings and streamlining testing. The training process incorporated innovative strategies such as duplicate recognition, dropout implementation, pooling adjustments, and label modifications to enhance retrieval performance. Hyperparameter tuning further refined the retrieval component, with improvements validated using the recall@10 metric, which measures the proportion of relevant passages among the top-10 results. The refined retrieval component effectively identifies pertinent passages within regulatory documents, expediting information access and supporting compliance efforts.</abstract>
<identifier type="citekey">bayer-etal-2025-regnlp</identifier>
<location>
<url>https://aclanthology.org/2025.regnlp-1.15/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>97</start>
<end>101</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A REGNLP Framework: Developing Retrieval-Augmented Generation for Regulatory Document Analysis
%A Bayer, Ozan
%A Ulu, Elif Nehir
%A Sarkın, Yasemin
%A Sütçü, Ekrem
%A Çelik, Defne Buse
%A Karamanlıoğlu, Alper
%A Karakaya, İsmail
%A Demirel, Berkan
%Y Gokhan, Tuba
%Y Wang, Kexin
%Y Gurevych, Iryna
%Y Briscoe, Ted
%S Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025)
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F bayer-etal-2025-regnlp
%X This study presents the development of a Retrieval-Augmented Generation (RAG) framework tailored for analyzing regulatory documents from the Abu Dhabi Global Markets (ADGM). The methodology encompasses comprehensive data preprocessing, including extraction, cleaning, and compression of documents, as well as the organization of the ObliQA dataset. The embedding model is utilized for generating embeddings during the retrieval phase, facilitated by the txtai library for managing embeddings and streamlining testing. The training process incorporated innovative strategies such as duplicate recognition, dropout implementation, pooling adjustments, and label modifications to enhance retrieval performance. Hyperparameter tuning further refined the retrieval component, with improvements validated using the recall@10 metric, which measures the proportion of relevant passages among the top-10 results. The refined retrieval component effectively identifies pertinent passages within regulatory documents, expediting information access and supporting compliance efforts.
%U https://aclanthology.org/2025.regnlp-1.15/
%P 97-101
Markdown (Informal)
[A REGNLP Framework: Developing Retrieval-Augmented Generation for Regulatory Document Analysis](https://aclanthology.org/2025.regnlp-1.15/) (Bayer et al., RegNLP 2025)
ACL
- Ozan Bayer, Elif Nehir Ulu, Yasemin Sarkın, Ekrem Sütçü, Defne Buse Çelik, Alper Karamanlıoğlu, İsmail Karakaya, and Berkan Demirel. 2025. A REGNLP Framework: Developing Retrieval-Augmented Generation for Regulatory Document Analysis. In Proceedings of the 1st Regulatory NLP Workshop (RegNLP 2025), pages 97–101, Abu Dhabi, UAE. Association for Computational Linguistics.