@inproceedings{castro-farias-2026-geracao,
title = "Gera{\c{c}}{\~a}o de consultas {SPARQL} a partir de linguagem natural",
author = "Castro, Heber Gustavo Xavier de and
Farias, Clever Ricardo Guareis de",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.67/",
pages = "676--686",
ISBN = "979-8-89176-387-6",
abstract = "The Semantic Web aims to make web data understandable not only to humans but also to machines, enabling more efficient data integration, sharing, and reuse. Linked Open Data (LOD) initiatives have supported this vision by promoting the publication of semantically annotated and interconnected data. However, querying LOD repositories typically requires knowledge of SPARQL, a complex query language that limits access for non-expert users. Although several approaches have been proposed to automatically generate SPARQL queries from natural-language questions, most are designed for English and are tightly coupled to specific domains, which hinders reuse. This article presents a generic, domain-independent approach for generating SPARQL queries from questions written in Portuguese. The proposed method uses reference questions, parameterized query templates, and a synonym dictionary enriched by lexical resources and similarity metrics. The implementation is supported by the Natural2SPARQL tool, and the approach is validated through a case study in the financial domain using real data from the Brazilian stock exchange (B3). The results indicate that the method enables flexible and semantically accurate SPARQL query generation from natural-language input. Unlike learning-based approaches, our method avoids retraining and achieves up to 93.3{\%} end-to-end success in controlled settings, demonstrating robustness and low adaptation cost."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="castro-farias-2026-geracao">
<titleInfo>
<title>Geração de consultas SPARQL a partir de linguagem natural</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heber</namePart>
<namePart type="given">Gustavo</namePart>
<namePart type="given">Xavier</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Castro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clever</namePart>
<namePart type="given">Ricardo</namePart>
<namePart type="given">Guareis</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Farias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>The Semantic Web aims to make web data understandable not only to humans but also to machines, enabling more efficient data integration, sharing, and reuse. Linked Open Data (LOD) initiatives have supported this vision by promoting the publication of semantically annotated and interconnected data. However, querying LOD repositories typically requires knowledge of SPARQL, a complex query language that limits access for non-expert users. Although several approaches have been proposed to automatically generate SPARQL queries from natural-language questions, most are designed for English and are tightly coupled to specific domains, which hinders reuse. This article presents a generic, domain-independent approach for generating SPARQL queries from questions written in Portuguese. The proposed method uses reference questions, parameterized query templates, and a synonym dictionary enriched by lexical resources and similarity metrics. The implementation is supported by the Natural2SPARQL tool, and the approach is validated through a case study in the financial domain using real data from the Brazilian stock exchange (B3). The results indicate that the method enables flexible and semantically accurate SPARQL query generation from natural-language input. Unlike learning-based approaches, our method avoids retraining and achieves up to 93.3% end-to-end success in controlled settings, demonstrating robustness and low adaptation cost.</abstract>
<identifier type="citekey">castro-farias-2026-geracao</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.67/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>676</start>
<end>686</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Geração de consultas SPARQL a partir de linguagem natural
%A Castro, Heber Gustavo Xavier de
%A Farias, Clever Ricardo Guareis de
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F castro-farias-2026-geracao
%X The Semantic Web aims to make web data understandable not only to humans but also to machines, enabling more efficient data integration, sharing, and reuse. Linked Open Data (LOD) initiatives have supported this vision by promoting the publication of semantically annotated and interconnected data. However, querying LOD repositories typically requires knowledge of SPARQL, a complex query language that limits access for non-expert users. Although several approaches have been proposed to automatically generate SPARQL queries from natural-language questions, most are designed for English and are tightly coupled to specific domains, which hinders reuse. This article presents a generic, domain-independent approach for generating SPARQL queries from questions written in Portuguese. The proposed method uses reference questions, parameterized query templates, and a synonym dictionary enriched by lexical resources and similarity metrics. The implementation is supported by the Natural2SPARQL tool, and the approach is validated through a case study in the financial domain using real data from the Brazilian stock exchange (B3). The results indicate that the method enables flexible and semantically accurate SPARQL query generation from natural-language input. Unlike learning-based approaches, our method avoids retraining and achieves up to 93.3% end-to-end success in controlled settings, demonstrating robustness and low adaptation cost.
%U https://aclanthology.org/2026.propor-1.67/
%P 676-686
Markdown (Informal)
[Geração de consultas SPARQL a partir de linguagem natural](https://aclanthology.org/2026.propor-1.67/) (Castro & Farias, PROPOR 2026)
ACL
- Heber Gustavo Xavier de Castro and Clever Ricardo Guareis de Farias. 2026. Geração de consultas SPARQL a partir de linguagem natural. In Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1, pages 676–686, Salvador, Brazil. Association for Computational Linguistics.