@inproceedings{widjaja-etal-2022-kgxboard,
title = "{KG}x{B}oard: Explainable and Interactive Leaderboard for Evaluation of Knowledge Graph Completion Models",
author = "Widjaja, Haris and
Gashteovski, Kiril and
Ben Rim, Wiem and
Liu, Pengfei and
Malon, Christopher and
Ruffinelli, Daniel and
Lawrence, Carolin and
Neubig, Graham",
editor = "Che, Wanxiang and
Shutova, Ekaterina",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-demos.34",
doi = "10.18653/v1/2022.emnlp-demos.34",
pages = "338--350",
abstract = "Knowledge Graphs (KGs) store information in the form of (head, predicate, tail)-triples. To augment KGs with new knowledge, researchers proposed models for KG Completion (KGC) tasks such as link prediction; i.e., answering (h; p; ?) or (?; p; t) queries. Such models are usually evaluated with averaged metrics on a held-out test set. While useful for tracking progress, averaged single-score metrics cannotreveal what exactly a model has learned {---} or failed to learn. To address this issue, we propose KGxBoard: an interactive framework for performing fine-grained evaluation on meaningful subsets of the data, each of which tests individual and interpretable capabilities of a KGC model. In our experiments, we highlight the findings that we discovered with the use of KGxBoard, which would have been impossible to detect with standard averaged single-score metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="widjaja-etal-2022-kgxboard">
<titleInfo>
<title>KGxBoard: Explainable and Interactive Leaderboard for Evaluation of Knowledge Graph Completion Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haris</namePart>
<namePart type="family">Widjaja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiril</namePart>
<namePart type="family">Gashteovski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wiem</namePart>
<namePart type="family">Ben Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pengfei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Malon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Ruffinelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolin</namePart>
<namePart type="family">Lawrence</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Knowledge Graphs (KGs) store information in the form of (head, predicate, tail)-triples. To augment KGs with new knowledge, researchers proposed models for KG Completion (KGC) tasks such as link prediction; i.e., answering (h; p; ?) or (?; p; t) queries. Such models are usually evaluated with averaged metrics on a held-out test set. While useful for tracking progress, averaged single-score metrics cannotreveal what exactly a model has learned — or failed to learn. To address this issue, we propose KGxBoard: an interactive framework for performing fine-grained evaluation on meaningful subsets of the data, each of which tests individual and interpretable capabilities of a KGC model. In our experiments, we highlight the findings that we discovered with the use of KGxBoard, which would have been impossible to detect with standard averaged single-score metrics.</abstract>
<identifier type="citekey">widjaja-etal-2022-kgxboard</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-demos.34</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-demos.34</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>338</start>
<end>350</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KGxBoard: Explainable and Interactive Leaderboard for Evaluation of Knowledge Graph Completion Models
%A Widjaja, Haris
%A Gashteovski, Kiril
%A Ben Rim, Wiem
%A Liu, Pengfei
%A Malon, Christopher
%A Ruffinelli, Daniel
%A Lawrence, Carolin
%A Neubig, Graham
%Y Che, Wanxiang
%Y Shutova, Ekaterina
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F widjaja-etal-2022-kgxboard
%X Knowledge Graphs (KGs) store information in the form of (head, predicate, tail)-triples. To augment KGs with new knowledge, researchers proposed models for KG Completion (KGC) tasks such as link prediction; i.e., answering (h; p; ?) or (?; p; t) queries. Such models are usually evaluated with averaged metrics on a held-out test set. While useful for tracking progress, averaged single-score metrics cannotreveal what exactly a model has learned — or failed to learn. To address this issue, we propose KGxBoard: an interactive framework for performing fine-grained evaluation on meaningful subsets of the data, each of which tests individual and interpretable capabilities of a KGC model. In our experiments, we highlight the findings that we discovered with the use of KGxBoard, which would have been impossible to detect with standard averaged single-score metrics.
%R 10.18653/v1/2022.emnlp-demos.34
%U https://aclanthology.org/2022.emnlp-demos.34
%U https://doi.org/10.18653/v1/2022.emnlp-demos.34
%P 338-350
Markdown (Informal)
[KGxBoard: Explainable and Interactive Leaderboard for Evaluation of Knowledge Graph Completion Models](https://aclanthology.org/2022.emnlp-demos.34) (Widjaja et al., EMNLP 2022)
ACL
- Haris Widjaja, Kiril Gashteovski, Wiem Ben Rim, Pengfei Liu, Christopher Malon, Daniel Ruffinelli, Carolin Lawrence, and Graham Neubig. 2022. KGxBoard: Explainable and Interactive Leaderboard for Evaluation of Knowledge Graph Completion Models. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 338–350, Abu Dhabi, UAE. Association for Computational Linguistics.