@inproceedings{ozsoy-etal-2025-text2cypher,
title = "{T}ext2{C}ypher: Bridging Natural Language and Graph Databases",
author = "Ozsoy, Makbule Gulcin and
Messallem, Leila and
Besga, Jon and
Minneci, Gianandrea",
editor = "Gesese, Genet Asefa and
Sack, Harald and
Paulheim, Heiko and
Merono-Penuela, Albert and
Chen, Lihu",
booktitle = "Proceedings of the Workshop on Generative AI and Knowledge Graphs (GenAIK)",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2025.genaik-1.11/",
pages = "100--108",
abstract = "Knowledge graphs use nodes, relationships, and properties to represent arbitrarily complex data. When stored in a graph database, the Cypher query language enables efficient modeling and querying of knowledge graphs. However, using Cypher requires specialized knowledge, which can present a challenge for non-expert users. Our work Text2Cypher aims to bridge this gap by translating natural language queries into Cypher query language and extending the utility of knowledge graphs to non-technical expert users. While large language models (LLMs) can be used for this purpose, they often struggle to capture complex nuances, resulting in incomplete or incorrect outputs. Fine-tuning LLMs on domain-specific datasets has proven to be a more promising approach, but the limited availability of high-quality, publicly available Text2Cypher datasets makes this challenging. In this work, we show how we combined, cleaned and organized several publicly available datasets into a total of 44,387 instances, enabling effective fine-tuning and evaluation. Models fine-tuned on this dataset showed significant performance gains, with improvements in Google-BLEU and Exact Match scores over baseline models, highlighting the importance of high-quality datasets and fine-tuning in improving Text2Cypher performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ozsoy-etal-2025-text2cypher">
<titleInfo>
<title>Text2Cypher: Bridging Natural Language and Graph Databases</title>
</titleInfo>
<name type="personal">
<namePart type="given">Makbule</namePart>
<namePart type="given">Gulcin</namePart>
<namePart type="family">Ozsoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leila</namePart>
<namePart type="family">Messallem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jon</namePart>
<namePart type="family">Besga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gianandrea</namePart>
<namePart type="family">Minneci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Generative AI and Knowledge Graphs (GenAIK)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Genet</namePart>
<namePart type="given">Asefa</namePart>
<namePart type="family">Gesese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Sack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heiko</namePart>
<namePart type="family">Paulheim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Merono-Penuela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lihu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Knowledge graphs use nodes, relationships, and properties to represent arbitrarily complex data. When stored in a graph database, the Cypher query language enables efficient modeling and querying of knowledge graphs. However, using Cypher requires specialized knowledge, which can present a challenge for non-expert users. Our work Text2Cypher aims to bridge this gap by translating natural language queries into Cypher query language and extending the utility of knowledge graphs to non-technical expert users. While large language models (LLMs) can be used for this purpose, they often struggle to capture complex nuances, resulting in incomplete or incorrect outputs. Fine-tuning LLMs on domain-specific datasets has proven to be a more promising approach, but the limited availability of high-quality, publicly available Text2Cypher datasets makes this challenging. In this work, we show how we combined, cleaned and organized several publicly available datasets into a total of 44,387 instances, enabling effective fine-tuning and evaluation. Models fine-tuned on this dataset showed significant performance gains, with improvements in Google-BLEU and Exact Match scores over baseline models, highlighting the importance of high-quality datasets and fine-tuning in improving Text2Cypher performance.</abstract>
<identifier type="citekey">ozsoy-etal-2025-text2cypher</identifier>
<location>
<url>https://aclanthology.org/2025.genaik-1.11/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>100</start>
<end>108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Text2Cypher: Bridging Natural Language and Graph Databases
%A Ozsoy, Makbule Gulcin
%A Messallem, Leila
%A Besga, Jon
%A Minneci, Gianandrea
%Y Gesese, Genet Asefa
%Y Sack, Harald
%Y Paulheim, Heiko
%Y Merono-Penuela, Albert
%Y Chen, Lihu
%S Proceedings of the Workshop on Generative AI and Knowledge Graphs (GenAIK)
%D 2025
%8 January
%I International Committee on Computational Linguistics
%C Abu Dhabi, UAE
%F ozsoy-etal-2025-text2cypher
%X Knowledge graphs use nodes, relationships, and properties to represent arbitrarily complex data. When stored in a graph database, the Cypher query language enables efficient modeling and querying of knowledge graphs. However, using Cypher requires specialized knowledge, which can present a challenge for non-expert users. Our work Text2Cypher aims to bridge this gap by translating natural language queries into Cypher query language and extending the utility of knowledge graphs to non-technical expert users. While large language models (LLMs) can be used for this purpose, they often struggle to capture complex nuances, resulting in incomplete or incorrect outputs. Fine-tuning LLMs on domain-specific datasets has proven to be a more promising approach, but the limited availability of high-quality, publicly available Text2Cypher datasets makes this challenging. In this work, we show how we combined, cleaned and organized several publicly available datasets into a total of 44,387 instances, enabling effective fine-tuning and evaluation. Models fine-tuned on this dataset showed significant performance gains, with improvements in Google-BLEU and Exact Match scores over baseline models, highlighting the importance of high-quality datasets and fine-tuning in improving Text2Cypher performance.
%U https://aclanthology.org/2025.genaik-1.11/
%P 100-108
Markdown (Informal)
[Text2Cypher: Bridging Natural Language and Graph Databases](https://aclanthology.org/2025.genaik-1.11/) (Ozsoy et al., GenAIK 2025)
ACL
- Makbule Gulcin Ozsoy, Leila Messallem, Jon Besga, and Gianandrea Minneci. 2025. Text2Cypher: Bridging Natural Language and Graph Databases. In Proceedings of the Workshop on Generative AI and Knowledge Graphs (GenAIK), pages 100–108, Abu Dhabi, UAE. International Committee on Computational Linguistics.