@inproceedings{pinto-etal-2025-exploring,
title = "Exploring Medium-Sized {LLM}s for Knowledge Base Construction",
author = "Pinto, Tom{\'a}s Cerveira Da Cruz and
Oliveira, Hugo Gon{\c{c}}alo and
Fleger, Chris-Bennet",
editor = "Alam, Mehwish and
Tchechmedjiev, Andon and
Gracia, Jorge and
Gromann, Dagmar and
di Buono, Maria Pia and
Monti, Johanna and
Ionov, Maxim",
booktitle = "Proceedings of the 5th Conference on Language, Data and Knowledge",
month = sep,
year = "2025",
address = "Naples, Italy",
publisher = "Unior Press",
url = "https://aclanthology.org/2025.ldk-1.23/",
pages = "221--232",
ISBN = "978-88-6719-333-2",
abstract = "Knowledge base construction (KBC) is one of the great challenges in Natural Language Processing (NLP) and of fundamental importance to the growth of the Semantic Web. Large Language Models (LLMs) may be useful for extracting structured knowledge, including subject-predicate-object triples. We tackle the LM-KBC 2023 Challenge by leveraging LLMs for KBC, utilizing its dataset and benchmarking our results against challenge participants. Prompt engineering and ensemble strategies are tested for object prediction with pretrained LLMs in the 0.5-2B parameter range, which is between the limits of tracks 1 and 2 of the challenge.Selected models are assessed in zero-shot and few-shot learning approaches when predicting the objects of 21 relations. Results demonstrate that instruction-tuned LLMs outperform generative baselines by up to four times, with relation-adapted prompts playing a crucial role in performance. The ensemble approach further enhances triple extraction, with a relation-based selection strategy achieving the highest F1 score. These findings highlight the potential of medium-sized LLMs and prompt engineering methods for efficient KBC."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pinto-etal-2025-exploring">
<titleInfo>
<title>Exploring Medium-Sized LLMs for Knowledge Base Construction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomás</namePart>
<namePart type="given">Cerveira</namePart>
<namePart type="given">Da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Pinto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="given">Gonçalo</namePart>
<namePart type="family">Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris-Bennet</namePart>
<namePart type="family">Fleger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Conference on Language, Data and Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehwish</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andon</namePart>
<namePart type="family">Tchechmedjiev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorge</namePart>
<namePart type="family">Gracia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Ionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Unior Press</publisher>
<place>
<placeTerm type="text">Naples, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-88-6719-333-2</identifier>
</relatedItem>
<abstract>Knowledge base construction (KBC) is one of the great challenges in Natural Language Processing (NLP) and of fundamental importance to the growth of the Semantic Web. Large Language Models (LLMs) may be useful for extracting structured knowledge, including subject-predicate-object triples. We tackle the LM-KBC 2023 Challenge by leveraging LLMs for KBC, utilizing its dataset and benchmarking our results against challenge participants. Prompt engineering and ensemble strategies are tested for object prediction with pretrained LLMs in the 0.5-2B parameter range, which is between the limits of tracks 1 and 2 of the challenge.Selected models are assessed in zero-shot and few-shot learning approaches when predicting the objects of 21 relations. Results demonstrate that instruction-tuned LLMs outperform generative baselines by up to four times, with relation-adapted prompts playing a crucial role in performance. The ensemble approach further enhances triple extraction, with a relation-based selection strategy achieving the highest F1 score. These findings highlight the potential of medium-sized LLMs and prompt engineering methods for efficient KBC.</abstract>
<identifier type="citekey">pinto-etal-2025-exploring</identifier>
<location>
<url>https://aclanthology.org/2025.ldk-1.23/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>221</start>
<end>232</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Medium-Sized LLMs for Knowledge Base Construction
%A Pinto, Tomás Cerveira Da Cruz
%A Oliveira, Hugo Gonçalo
%A Fleger, Chris-Bennet
%Y Alam, Mehwish
%Y Tchechmedjiev, Andon
%Y Gracia, Jorge
%Y Gromann, Dagmar
%Y di Buono, Maria Pia
%Y Monti, Johanna
%Y Ionov, Maxim
%S Proceedings of the 5th Conference on Language, Data and Knowledge
%D 2025
%8 September
%I Unior Press
%C Naples, Italy
%@ 978-88-6719-333-2
%F pinto-etal-2025-exploring
%X Knowledge base construction (KBC) is one of the great challenges in Natural Language Processing (NLP) and of fundamental importance to the growth of the Semantic Web. Large Language Models (LLMs) may be useful for extracting structured knowledge, including subject-predicate-object triples. We tackle the LM-KBC 2023 Challenge by leveraging LLMs for KBC, utilizing its dataset and benchmarking our results against challenge participants. Prompt engineering and ensemble strategies are tested for object prediction with pretrained LLMs in the 0.5-2B parameter range, which is between the limits of tracks 1 and 2 of the challenge.Selected models are assessed in zero-shot and few-shot learning approaches when predicting the objects of 21 relations. Results demonstrate that instruction-tuned LLMs outperform generative baselines by up to four times, with relation-adapted prompts playing a crucial role in performance. The ensemble approach further enhances triple extraction, with a relation-based selection strategy achieving the highest F1 score. These findings highlight the potential of medium-sized LLMs and prompt engineering methods for efficient KBC.
%U https://aclanthology.org/2025.ldk-1.23/
%P 221-232
Markdown (Informal)
[Exploring Medium-Sized LLMs for Knowledge Base Construction](https://aclanthology.org/2025.ldk-1.23/) (Pinto et al., LDK 2025)
ACL