@inproceedings{caldas-etal-2026-specializing,
title = "Specializing a Small Language Model for Closed-Domain {P}ortuguese {RAG} using Knowledge Graph Supervision",
author = "Caldas, Josu{\'e} and
Souza, Elvis de and
Silva, Patr{\'i}cia and
Pacheco, Marco",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.28/",
pages = "281--290",
ISBN = "979-8-89176-387-6",
abstract = "Fine-tuned small language models (SLMs) have emerged as effective alternatives for closed-domain tasks, where large language models (LLMs) often lack sufficient parametric knowledge. This study presents a methodology for adapting a small language model to a closed-domain question answering (Q A) task. For each question, the model is trained to output an answer based on the most relevant context passage, among ten provided candidates, thus reproducing the logic of a Retrieval-Augmented Generation (RAG) framework. The fine-tuning data were derived from PetroKGraph, an existing knowledge graph built from Portuguese-language resources in the oil and gas (O G) domain. Experimental results show that the fine-tuned model achieves a 20 percentage points accuracy improvement over the base model on closed-domain questions. It also surpasses GPT-4o and GPT-4o Mini by 12 and 25 points, respectively. Moreover, its performance on general-domain tasks remains comparable to that of the base model, indicating that the specialized model effectively learned domain specific knowledge while maintaining general reasoning capabilities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="caldas-etal-2026-specializing">
<titleInfo>
<title>Specializing a Small Language Model for Closed-Domain Portuguese RAG using Knowledge Graph Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Josué</namePart>
<namePart type="family">Caldas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elvis</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrícia</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Pacheco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Fine-tuned small language models (SLMs) have emerged as effective alternatives for closed-domain tasks, where large language models (LLMs) often lack sufficient parametric knowledge. This study presents a methodology for adapting a small language model to a closed-domain question answering (Q A) task. For each question, the model is trained to output an answer based on the most relevant context passage, among ten provided candidates, thus reproducing the logic of a Retrieval-Augmented Generation (RAG) framework. The fine-tuning data were derived from PetroKGraph, an existing knowledge graph built from Portuguese-language resources in the oil and gas (O G) domain. Experimental results show that the fine-tuned model achieves a 20 percentage points accuracy improvement over the base model on closed-domain questions. It also surpasses GPT-4o and GPT-4o Mini by 12 and 25 points, respectively. Moreover, its performance on general-domain tasks remains comparable to that of the base model, indicating that the specialized model effectively learned domain specific knowledge while maintaining general reasoning capabilities.</abstract>
<identifier type="citekey">caldas-etal-2026-specializing</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.28/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>281</start>
<end>290</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Specializing a Small Language Model for Closed-Domain Portuguese RAG using Knowledge Graph Supervision
%A Caldas, Josué
%A Souza, Elvis de
%A Silva, Patrícia
%A Pacheco, Marco
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F caldas-etal-2026-specializing
%X Fine-tuned small language models (SLMs) have emerged as effective alternatives for closed-domain tasks, where large language models (LLMs) often lack sufficient parametric knowledge. This study presents a methodology for adapting a small language model to a closed-domain question answering (Q A) task. For each question, the model is trained to output an answer based on the most relevant context passage, among ten provided candidates, thus reproducing the logic of a Retrieval-Augmented Generation (RAG) framework. The fine-tuning data were derived from PetroKGraph, an existing knowledge graph built from Portuguese-language resources in the oil and gas (O G) domain. Experimental results show that the fine-tuned model achieves a 20 percentage points accuracy improvement over the base model on closed-domain questions. It also surpasses GPT-4o and GPT-4o Mini by 12 and 25 points, respectively. Moreover, its performance on general-domain tasks remains comparable to that of the base model, indicating that the specialized model effectively learned domain specific knowledge while maintaining general reasoning capabilities.
%U https://aclanthology.org/2026.propor-1.28/
%P 281-290
Markdown (Informal)
[Specializing a Small Language Model for Closed-Domain Portuguese RAG using Knowledge Graph Supervision](https://aclanthology.org/2026.propor-1.28/) (Caldas et al., PROPOR 2026)
ACL