@inproceedings{ferreira-etal-2024-towards,
title = "Towards Automated Evaluation of Knowledge Encoded in Large Language Models",
author = "Ferreira, Bruno Carlos Lu{\'\i}s and
Silva, Catarina and
Gon{\c{c}}alo Oliveira, Hugo",
editor = "S{\'e}rasset, Gilles and
Oliveira, Hugo Gon{\c{c}}alo and
Oleskeviciene, Giedre Valunaite",
booktitle = "Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.dlnld-1.7",
pages = "76--85",
abstract = "Large Language Models (LLMs) have a significant user base and are gaining increasing interest and impact across various domains. Given their expanding influence, it is crucial to implement appropriate guardrails or controls to ensure ethical and responsible use. In this paper, we propose to automate the evaluation of the knowledge stored in LLMs. This is achieved by generating datasets tailored for this specific purpose, in any selected domain. Our approach consists of four major steps: (i) extraction of relevant entities; (ii) gathering of domain properties; (iii) dataset generation; and (iv) model evaluation. In order to materialize this vision, tools and resources were experimented for entity linking, knowledge acquisition, classification and prompt generation, yielding valuable insights and lessons. The generation of datasets for domain specific model evaluation has successfully proved that the approach can be a future tool for evaluating and moving LLMs {``}black-boxes{''} to human-interpretable knowledge bases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferreira-etal-2024-towards">
<titleInfo>
<title>Towards Automated Evaluation of Knowledge Encoded in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bruno</namePart>
<namePart type="given">Carlos</namePart>
<namePart type="given">Luís</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catarina</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="family">Gonçalo Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gilles</namePart>
<namePart type="family">Sérasset</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="given">Gonçalo</namePart>
<namePart type="family">Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giedre</namePart>
<namePart type="given">Valunaite</namePart>
<namePart type="family">Oleskeviciene</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) have a significant user base and are gaining increasing interest and impact across various domains. Given their expanding influence, it is crucial to implement appropriate guardrails or controls to ensure ethical and responsible use. In this paper, we propose to automate the evaluation of the knowledge stored in LLMs. This is achieved by generating datasets tailored for this specific purpose, in any selected domain. Our approach consists of four major steps: (i) extraction of relevant entities; (ii) gathering of domain properties; (iii) dataset generation; and (iv) model evaluation. In order to materialize this vision, tools and resources were experimented for entity linking, knowledge acquisition, classification and prompt generation, yielding valuable insights and lessons. The generation of datasets for domain specific model evaluation has successfully proved that the approach can be a future tool for evaluating and moving LLMs “black-boxes” to human-interpretable knowledge bases.</abstract>
<identifier type="citekey">ferreira-etal-2024-towards</identifier>
<location>
<url>https://aclanthology.org/2024.dlnld-1.7</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>76</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Automated Evaluation of Knowledge Encoded in Large Language Models
%A Ferreira, Bruno Carlos Luís
%A Silva, Catarina
%A Gonçalo Oliveira, Hugo
%Y Sérasset, Gilles
%Y Oliveira, Hugo Gonçalo
%Y Oleskeviciene, Giedre Valunaite
%S Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ferreira-etal-2024-towards
%X Large Language Models (LLMs) have a significant user base and are gaining increasing interest and impact across various domains. Given their expanding influence, it is crucial to implement appropriate guardrails or controls to ensure ethical and responsible use. In this paper, we propose to automate the evaluation of the knowledge stored in LLMs. This is achieved by generating datasets tailored for this specific purpose, in any selected domain. Our approach consists of four major steps: (i) extraction of relevant entities; (ii) gathering of domain properties; (iii) dataset generation; and (iv) model evaluation. In order to materialize this vision, tools and resources were experimented for entity linking, knowledge acquisition, classification and prompt generation, yielding valuable insights and lessons. The generation of datasets for domain specific model evaluation has successfully proved that the approach can be a future tool for evaluating and moving LLMs “black-boxes” to human-interpretable knowledge bases.
%U https://aclanthology.org/2024.dlnld-1.7
%P 76-85
Markdown (Informal)
[Towards Automated Evaluation of Knowledge Encoded in Large Language Models](https://aclanthology.org/2024.dlnld-1.7) (Ferreira et al., DLnLD-WS 2024)
ACL