@inproceedings{silva-etal-2025-inductive,
title = "Inductive Learning on Heterogeneous Graphs Enhanced by {LLM}s for Software Mention Detection",
author = "Silva, Gabriel and
Rodriges, M{\'a}rio and
Teixeira, Ant{\'o}nio and
Amorim, Marlene",
editor = "Ghosal, Tirthankar and
Mayr, Philipp and
Singh, Amanpreet and
Naik, Aakanksha and
Rehm, Georg and
Freitag, Dayne and
Li, Dan and
Schimmler, Sonja and
De Waard, Anita",
booktitle = "Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sdp-1.16/",
doi = "10.18653/v1/2025.sdp-1.16",
pages = "164--172",
ISBN = "979-8-89176-265-7",
abstract = "This paper explores the synergy between Knowledge Graphs (KGs), Graph Machine Learning (Graph ML), and Large Language Models (LLMs) for multilingual Named Entity Recognition (NER) and Relation Extraction (RE), specifically targeting software mentions within the SOMD 2025 challenge. We propose a methodology where documents are first transformed into heterogeneous KGs enriched with linguistic features (Universal Dependencies) and external knowledge (entity linking). An inductive GraphSAGE model, operating on PyTorch Geometric{'}s `HeteroData{`} structure with dynamically generated multilingual embeddings, performs node classification tasks. For NER, Graph ML identifies candidate entities and types, with an LLM (DeepSeek v3) acting as a validation layer. For RE, Graph ML predicts dependency path convergence points indicative of relations, while the LLM classifies the relation type and direction based on entity context. Our results demonstrate the potential of this hybrid approach, showing significant performance gains post-competition (NER Phase 2 Macro F1 improved to 0.4364 from 0.2953, RE Phase 1 0.3355 Macro F1), which are already described in this paper, and highlighting the benefits of integrating structured graph learning with LLM reasoning for information extraction."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="silva-etal-2025-inductive">
<titleInfo>
<title>Inductive Learning on Heterogeneous Graphs Enhanced by LLMs for Software Mention Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mário</namePart>
<namePart type="family">Rodriges</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">António</namePart>
<namePart type="family">Teixeira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marlene</namePart>
<namePart type="family">Amorim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Rehm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Freitag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Schimmler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-265-7</identifier>
</relatedItem>
<abstract>This paper explores the synergy between Knowledge Graphs (KGs), Graph Machine Learning (Graph ML), and Large Language Models (LLMs) for multilingual Named Entity Recognition (NER) and Relation Extraction (RE), specifically targeting software mentions within the SOMD 2025 challenge. We propose a methodology where documents are first transformed into heterogeneous KGs enriched with linguistic features (Universal Dependencies) and external knowledge (entity linking). An inductive GraphSAGE model, operating on PyTorch Geometric’s ‘HeteroData‘ structure with dynamically generated multilingual embeddings, performs node classification tasks. For NER, Graph ML identifies candidate entities and types, with an LLM (DeepSeek v3) acting as a validation layer. For RE, Graph ML predicts dependency path convergence points indicative of relations, while the LLM classifies the relation type and direction based on entity context. Our results demonstrate the potential of this hybrid approach, showing significant performance gains post-competition (NER Phase 2 Macro F1 improved to 0.4364 from 0.2953, RE Phase 1 0.3355 Macro F1), which are already described in this paper, and highlighting the benefits of integrating structured graph learning with LLM reasoning for information extraction.</abstract>
<identifier type="citekey">silva-etal-2025-inductive</identifier>
<identifier type="doi">10.18653/v1/2025.sdp-1.16</identifier>
<location>
<url>https://aclanthology.org/2025.sdp-1.16/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>164</start>
<end>172</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inductive Learning on Heterogeneous Graphs Enhanced by LLMs for Software Mention Detection
%A Silva, Gabriel
%A Rodriges, Mário
%A Teixeira, António
%A Amorim, Marlene
%Y Ghosal, Tirthankar
%Y Mayr, Philipp
%Y Singh, Amanpreet
%Y Naik, Aakanksha
%Y Rehm, Georg
%Y Freitag, Dayne
%Y Li, Dan
%Y Schimmler, Sonja
%Y De Waard, Anita
%S Proceedings of the Fifth Workshop on Scholarly Document Processing (SDP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-265-7
%F silva-etal-2025-inductive
%X This paper explores the synergy between Knowledge Graphs (KGs), Graph Machine Learning (Graph ML), and Large Language Models (LLMs) for multilingual Named Entity Recognition (NER) and Relation Extraction (RE), specifically targeting software mentions within the SOMD 2025 challenge. We propose a methodology where documents are first transformed into heterogeneous KGs enriched with linguistic features (Universal Dependencies) and external knowledge (entity linking). An inductive GraphSAGE model, operating on PyTorch Geometric’s ‘HeteroData‘ structure with dynamically generated multilingual embeddings, performs node classification tasks. For NER, Graph ML identifies candidate entities and types, with an LLM (DeepSeek v3) acting as a validation layer. For RE, Graph ML predicts dependency path convergence points indicative of relations, while the LLM classifies the relation type and direction based on entity context. Our results demonstrate the potential of this hybrid approach, showing significant performance gains post-competition (NER Phase 2 Macro F1 improved to 0.4364 from 0.2953, RE Phase 1 0.3355 Macro F1), which are already described in this paper, and highlighting the benefits of integrating structured graph learning with LLM reasoning for information extraction.
%R 10.18653/v1/2025.sdp-1.16
%U https://aclanthology.org/2025.sdp-1.16/
%U https://doi.org/10.18653/v1/2025.sdp-1.16
%P 164-172
Markdown (Informal)
[Inductive Learning on Heterogeneous Graphs Enhanced by LLMs for Software Mention Detection](https://aclanthology.org/2025.sdp-1.16/) (Silva et al., sdp 2025)
ACL