@inproceedings{vuth-etal-2024-kgast,
title = "{KGAST}: From Knowledge Graphs to Annotated Synthetic Texts",
author = "Vuth, Nakanyseth and
S{\'e}rasset, Gilles and
Schwab, Didier",
editor = "Biswas, Russa and
Kaffee, Lucie-Aim{\'e}e and
Agarwal, Oshin and
Minervini, Pasquale and
Singh, Sameer and
de Melo, Gerard",
booktitle = "Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.kallm-1.5",
doi = "10.18653/v1/2024.kallm-1.5",
pages = "43--55",
abstract = "In recent years, the use of synthetic data, either as a complement or a substitute for original data, has emerged as a solution to challenges such as data scarcity and security risks. This paper is an initial attempt to automatically generate such data for Information Extraction tasks. We accomplished this by developing a novel synthetic data generation framework called KGAST, which leverages Knowledge Graphs and Large Language Models. In our preliminary study, we conducted simple experiments to generate synthetic versions of two datasets{---}a French security defense dataset and an English general domain dataset, after which we evaluated them both intrinsically and extrinsically. The results indicated that synthetic data can effectively complement original data, improving the performance of models on classes with limited training samples. This highlights KGAST{'}s potential as a tool for generating synthetic data for Information Extraction tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vuth-etal-2024-kgast">
<titleInfo>
<title>KGAST: From Knowledge Graphs to Annotated Synthetic Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nakanyseth</namePart>
<namePart type="family">Vuth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gilles</namePart>
<namePart type="family">Sérasset</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Didier</namePart>
<namePart type="family">Schwab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Russa</namePart>
<namePart type="family">Biswas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucie-Aimée</namePart>
<namePart type="family">Kaffee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oshin</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pasquale</namePart>
<namePart type="family">Minervini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">de Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, the use of synthetic data, either as a complement or a substitute for original data, has emerged as a solution to challenges such as data scarcity and security risks. This paper is an initial attempt to automatically generate such data for Information Extraction tasks. We accomplished this by developing a novel synthetic data generation framework called KGAST, which leverages Knowledge Graphs and Large Language Models. In our preliminary study, we conducted simple experiments to generate synthetic versions of two datasets—a French security defense dataset and an English general domain dataset, after which we evaluated them both intrinsically and extrinsically. The results indicated that synthetic data can effectively complement original data, improving the performance of models on classes with limited training samples. This highlights KGAST’s potential as a tool for generating synthetic data for Information Extraction tasks.</abstract>
<identifier type="citekey">vuth-etal-2024-kgast</identifier>
<identifier type="doi">10.18653/v1/2024.kallm-1.5</identifier>
<location>
<url>https://aclanthology.org/2024.kallm-1.5</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>43</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KGAST: From Knowledge Graphs to Annotated Synthetic Texts
%A Vuth, Nakanyseth
%A Sérasset, Gilles
%A Schwab, Didier
%Y Biswas, Russa
%Y Kaffee, Lucie-Aimée
%Y Agarwal, Oshin
%Y Minervini, Pasquale
%Y Singh, Sameer
%Y de Melo, Gerard
%S Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F vuth-etal-2024-kgast
%X In recent years, the use of synthetic data, either as a complement or a substitute for original data, has emerged as a solution to challenges such as data scarcity and security risks. This paper is an initial attempt to automatically generate such data for Information Extraction tasks. We accomplished this by developing a novel synthetic data generation framework called KGAST, which leverages Knowledge Graphs and Large Language Models. In our preliminary study, we conducted simple experiments to generate synthetic versions of two datasets—a French security defense dataset and an English general domain dataset, after which we evaluated them both intrinsically and extrinsically. The results indicated that synthetic data can effectively complement original data, improving the performance of models on classes with limited training samples. This highlights KGAST’s potential as a tool for generating synthetic data for Information Extraction tasks.
%R 10.18653/v1/2024.kallm-1.5
%U https://aclanthology.org/2024.kallm-1.5
%U https://doi.org/10.18653/v1/2024.kallm-1.5
%P 43-55
Markdown (Informal)
[KGAST: From Knowledge Graphs to Annotated Synthetic Texts](https://aclanthology.org/2024.kallm-1.5) (Vuth et al., KaLLM-WS 2024)
ACL
- Nakanyseth Vuth, Gilles Sérasset, and Didier Schwab. 2024. KGAST: From Knowledge Graphs to Annotated Synthetic Texts. In Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024), pages 43–55, Bangkok, Thailand. Association for Computational Linguistics.