@inproceedings{bajpai-etal-2025-hg,
title = "{HG}-{I}nsight{L}og: Context Prioritization and Reduction for Question Answering with Non-Natural Language Construct Log Data",
author = "Bajpai, Supriya and
Gopal, Athira and
Harjpal, Chandrakant and
Kumar, Niraj",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1214/",
doi = "10.18653/v1/2025.findings-acl.1214",
pages = "23679--23695",
ISBN = "979-8-89176-256-5",
abstract = "Modern IT systems generate vast amounts of log data, which pose challenges for Large Language Models (LLMs) due to their large size, irrelevant entries, and non-Natural Language (non-NL) construct (e.g., domain-specific jargon, error codes, file paths, and abbreviations). Traditional methods like Retrieval-Augmented Generation (RAG) and GraphRAG fail to preserve temporal sequences, handle non-NL for context and entities extraction, and dynamically prioritize query-relevant context. To address these limitations, we propose HG-InsightLog, a novel framework that constructs a multi-entity temporal hypergraph representing log attribute-value pair as nodes and connecting them with hyperedges, capturing critical connections in the data. HG-InsightLog introduces a multi-step query personalization mechanism enhancing the Personalized PageRank algorithm to rank hyperedges based on query relevance and contextual centrality to priortize critical connections. Top ranked hyperedges are extracted and converted back into log formats preserving temporal order and reducing context. Experimental results across multiple datasets demonstrate its superiority over existing methods, enhancing factual, causal, and analytical reasoning. Our approach enables smaller LLMs like LLaMA-8B to perform effective log-based QA. Being model-agnostic and training-free, it scales with evolving open-source LLMs without relying on proprietary systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bajpai-etal-2025-hg">
<titleInfo>
<title>HG-InsightLog: Context Prioritization and Reduction for Question Answering with Non-Natural Language Construct Log Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Supriya</namePart>
<namePart type="family">Bajpai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Athira</namePart>
<namePart type="family">Gopal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chandrakant</namePart>
<namePart type="family">Harjpal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Niraj</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Modern IT systems generate vast amounts of log data, which pose challenges for Large Language Models (LLMs) due to their large size, irrelevant entries, and non-Natural Language (non-NL) construct (e.g., domain-specific jargon, error codes, file paths, and abbreviations). Traditional methods like Retrieval-Augmented Generation (RAG) and GraphRAG fail to preserve temporal sequences, handle non-NL for context and entities extraction, and dynamically prioritize query-relevant context. To address these limitations, we propose HG-InsightLog, a novel framework that constructs a multi-entity temporal hypergraph representing log attribute-value pair as nodes and connecting them with hyperedges, capturing critical connections in the data. HG-InsightLog introduces a multi-step query personalization mechanism enhancing the Personalized PageRank algorithm to rank hyperedges based on query relevance and contextual centrality to priortize critical connections. Top ranked hyperedges are extracted and converted back into log formats preserving temporal order and reducing context. Experimental results across multiple datasets demonstrate its superiority over existing methods, enhancing factual, causal, and analytical reasoning. Our approach enables smaller LLMs like LLaMA-8B to perform effective log-based QA. Being model-agnostic and training-free, it scales with evolving open-source LLMs without relying on proprietary systems.</abstract>
<identifier type="citekey">bajpai-etal-2025-hg</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1214</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1214/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>23679</start>
<end>23695</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HG-InsightLog: Context Prioritization and Reduction for Question Answering with Non-Natural Language Construct Log Data
%A Bajpai, Supriya
%A Gopal, Athira
%A Harjpal, Chandrakant
%A Kumar, Niraj
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F bajpai-etal-2025-hg
%X Modern IT systems generate vast amounts of log data, which pose challenges for Large Language Models (LLMs) due to their large size, irrelevant entries, and non-Natural Language (non-NL) construct (e.g., domain-specific jargon, error codes, file paths, and abbreviations). Traditional methods like Retrieval-Augmented Generation (RAG) and GraphRAG fail to preserve temporal sequences, handle non-NL for context and entities extraction, and dynamically prioritize query-relevant context. To address these limitations, we propose HG-InsightLog, a novel framework that constructs a multi-entity temporal hypergraph representing log attribute-value pair as nodes and connecting them with hyperedges, capturing critical connections in the data. HG-InsightLog introduces a multi-step query personalization mechanism enhancing the Personalized PageRank algorithm to rank hyperedges based on query relevance and contextual centrality to priortize critical connections. Top ranked hyperedges are extracted and converted back into log formats preserving temporal order and reducing context. Experimental results across multiple datasets demonstrate its superiority over existing methods, enhancing factual, causal, and analytical reasoning. Our approach enables smaller LLMs like LLaMA-8B to perform effective log-based QA. Being model-agnostic and training-free, it scales with evolving open-source LLMs without relying on proprietary systems.
%R 10.18653/v1/2025.findings-acl.1214
%U https://aclanthology.org/2025.findings-acl.1214/
%U https://doi.org/10.18653/v1/2025.findings-acl.1214
%P 23679-23695
Markdown (Informal)
[HG-InsightLog: Context Prioritization and Reduction for Question Answering with Non-Natural Language Construct Log Data](https://aclanthology.org/2025.findings-acl.1214/) (Bajpai et al., Findings 2025)
ACL