@inproceedings{chatterjee-das-2026-ju,
title = "{JU}-{NLP}-{PG} at {RAG}4{R}eports 2026: Memory-Efficient Multilingual Report Generation with 4-bit Quantized {LLM}s",
author = "Chatterjee, Swayam and
Das, Dipankar",
editor = "Yang, Eugene and
Lawrie, Dawn and
MacAvaney, Sean and
Mayfield, James and
Soldaini, Luca and
Yates, Andrew",
booktitle = "Proceedings of the 1st Workshop on Multilingual Report Generation via Retrieval Augmented Generation ({RAG}4{R}eports 2026)",
month = jul,
year = "2026",
address = "San Diego, CA, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.rag4reports-1.16/",
pages = "108--112",
ISBN = "979-8-89176-417-0",
abstract = "In the present article, we have described our system developed for participating in Task B on Multilingual Report Generation under RAG4Reports 2026 at ACL 2026 with submitted run ID ju{\_}nlp{\_}pg. The problem statement is given a report request in English, the system retrieves relevant passages from a four million multilingual document corpus (English, Chinese, Russian, Arabic) and generates a grounded, citation-bearing report. Our core challenge was how to fit a large retrieval corpus along with a capable generative model on a two-GPU node with {\ensuremath{\approx}}29 GB RAM. We addressed the challenge employing three different techniques: (1) 4-bit NF4 quantization, shrinking the LLM from {\ensuremath{\approx}}14 GB to {\ensuremath{\approx}}4 GB; (2) memory-mapped, chunked FAISS index construction over pre-computed multilingual-e5-large embeddings; and (3) strict model-loading order to prevent heap fragmentation. On the other hand, the reports are structured around topic nuggets to directly target the Auto-ARGUE evaluation signal."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chatterjee-das-2026-ju">
<titleInfo>
<title>JU-NLP-PG at RAG4Reports 2026: Memory-Efficient Multilingual Report Generation with 4-bit Quantized LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Swayam</namePart>
<namePart type="family">Chatterjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipankar</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilingual Report Generation via Retrieval Augmented Generation (RAG4Reports 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eugene</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dawn</namePart>
<namePart type="family">Lawrie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Mayfield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luca</namePart>
<namePart type="family">Soldaini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Yates</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, CA, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-417-0</identifier>
</relatedItem>
<abstract>In the present article, we have described our system developed for participating in Task B on Multilingual Report Generation under RAG4Reports 2026 at ACL 2026 with submitted run ID ju_nlp_pg. The problem statement is given a report request in English, the system retrieves relevant passages from a four million multilingual document corpus (English, Chinese, Russian, Arabic) and generates a grounded, citation-bearing report. Our core challenge was how to fit a large retrieval corpus along with a capable generative model on a two-GPU node with \ensuremath\approx29 GB RAM. We addressed the challenge employing three different techniques: (1) 4-bit NF4 quantization, shrinking the LLM from \ensuremath\approx14 GB to \ensuremath\approx4 GB; (2) memory-mapped, chunked FAISS index construction over pre-computed multilingual-e5-large embeddings; and (3) strict model-loading order to prevent heap fragmentation. On the other hand, the reports are structured around topic nuggets to directly target the Auto-ARGUE evaluation signal.</abstract>
<identifier type="citekey">chatterjee-das-2026-ju</identifier>
<location>
<url>https://aclanthology.org/2026.rag4reports-1.16/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>108</start>
<end>112</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JU-NLP-PG at RAG4Reports 2026: Memory-Efficient Multilingual Report Generation with 4-bit Quantized LLMs
%A Chatterjee, Swayam
%A Das, Dipankar
%Y Yang, Eugene
%Y Lawrie, Dawn
%Y MacAvaney, Sean
%Y Mayfield, James
%Y Soldaini, Luca
%Y Yates, Andrew
%S Proceedings of the 1st Workshop on Multilingual Report Generation via Retrieval Augmented Generation (RAG4Reports 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, CA, USA
%@ 979-8-89176-417-0
%F chatterjee-das-2026-ju
%X In the present article, we have described our system developed for participating in Task B on Multilingual Report Generation under RAG4Reports 2026 at ACL 2026 with submitted run ID ju_nlp_pg. The problem statement is given a report request in English, the system retrieves relevant passages from a four million multilingual document corpus (English, Chinese, Russian, Arabic) and generates a grounded, citation-bearing report. Our core challenge was how to fit a large retrieval corpus along with a capable generative model on a two-GPU node with \ensuremath\approx29 GB RAM. We addressed the challenge employing three different techniques: (1) 4-bit NF4 quantization, shrinking the LLM from \ensuremath\approx14 GB to \ensuremath\approx4 GB; (2) memory-mapped, chunked FAISS index construction over pre-computed multilingual-e5-large embeddings; and (3) strict model-loading order to prevent heap fragmentation. On the other hand, the reports are structured around topic nuggets to directly target the Auto-ARGUE evaluation signal.
%U https://aclanthology.org/2026.rag4reports-1.16/
%P 108-112
Markdown (Informal)
[JU-NLP-PG at RAG4Reports 2026: Memory-Efficient Multilingual Report Generation with 4-bit Quantized LLMs](https://aclanthology.org/2026.rag4reports-1.16/) (Chatterjee & Das, RAG4Reports 2026)
ACL