@inproceedings{saini-etal-2023-90,
title = "90{\%} F1 Score in Relation Triple Extraction: Is it Real?",
author = "Saini, Pratik and
Pal, Samiran and
Nayak, Tapas and
Bhattacharya, Indrajit",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Sinha, Koustuv and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Cotterell, Ryan and
Bruni, Elia",
booktitle = "Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.genbench-1.1",
doi = "10.18653/v1/2023.genbench-1.1",
pages = "1--11",
abstract = "Extracting relational triples from text is a crucial task for constructing knowledge bases. Recent advancements in joint entity and relation extraction models have demonstrated remarkable F1 scores ({\mbox{$\geq$}} 90{\%}) in accurately extracting relational triples from free text. However, these models have been evaluated under restrictive experimental settings and unrealistic datasets. They overlook sentences with zero triples (zerocardinality), thereby simplifying the task. In this paper, we present a benchmark study of state-of-the-art joint entity and relation extraction models under a more realistic setting. We include sentences that lack any triples in our experiments, providing a comprehensive evaluation. Our findings reveal a significant decline (approximately 10-15{\%} in one dataset and 6-14{\%} in another dataset) in the models{'} F1 scores within this realistic experimental setup. Furthermore, we propose a two-step modeling approach that utilizes a simple BERT-based classifier. This approach leads to overall performance improvement in these models within the realistic experimental setting.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saini-etal-2023-90">
<titleInfo>
<title>90% F1 Score in Relation Triple Extraction: Is it Real?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pratik</namePart>
<namePart type="family">Saini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samiran</namePart>
<namePart type="family">Pal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tapas</namePart>
<namePart type="family">Nayak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indrajit</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustuv</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elia</namePart>
<namePart type="family">Bruni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Extracting relational triples from text is a crucial task for constructing knowledge bases. Recent advancements in joint entity and relation extraction models have demonstrated remarkable F1 scores (\geq 90%) in accurately extracting relational triples from free text. However, these models have been evaluated under restrictive experimental settings and unrealistic datasets. They overlook sentences with zero triples (zerocardinality), thereby simplifying the task. In this paper, we present a benchmark study of state-of-the-art joint entity and relation extraction models under a more realistic setting. We include sentences that lack any triples in our experiments, providing a comprehensive evaluation. Our findings reveal a significant decline (approximately 10-15% in one dataset and 6-14% in another dataset) in the models’ F1 scores within this realistic experimental setup. Furthermore, we propose a two-step modeling approach that utilizes a simple BERT-based classifier. This approach leads to overall performance improvement in these models within the realistic experimental setting.</abstract>
<identifier type="citekey">saini-etal-2023-90</identifier>
<identifier type="doi">10.18653/v1/2023.genbench-1.1</identifier>
<location>
<url>https://aclanthology.org/2023.genbench-1.1</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>1</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 90% F1 Score in Relation Triple Extraction: Is it Real?
%A Saini, Pratik
%A Pal, Samiran
%A Nayak, Tapas
%A Bhattacharya, Indrajit
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Sinha, Koustuv
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Cotterell, Ryan
%Y Bruni, Elia
%S Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F saini-etal-2023-90
%X Extracting relational triples from text is a crucial task for constructing knowledge bases. Recent advancements in joint entity and relation extraction models have demonstrated remarkable F1 scores (\geq 90%) in accurately extracting relational triples from free text. However, these models have been evaluated under restrictive experimental settings and unrealistic datasets. They overlook sentences with zero triples (zerocardinality), thereby simplifying the task. In this paper, we present a benchmark study of state-of-the-art joint entity and relation extraction models under a more realistic setting. We include sentences that lack any triples in our experiments, providing a comprehensive evaluation. Our findings reveal a significant decline (approximately 10-15% in one dataset and 6-14% in another dataset) in the models’ F1 scores within this realistic experimental setup. Furthermore, we propose a two-step modeling approach that utilizes a simple BERT-based classifier. This approach leads to overall performance improvement in these models within the realistic experimental setting.
%R 10.18653/v1/2023.genbench-1.1
%U https://aclanthology.org/2023.genbench-1.1
%U https://doi.org/10.18653/v1/2023.genbench-1.1
%P 1-11
Markdown (Informal)
[90% F1 Score in Relation Triple Extraction: Is it Real?](https://aclanthology.org/2023.genbench-1.1) (Saini et al., GenBench-WS 2023)
ACL
- Pratik Saini, Samiran Pal, Tapas Nayak, and Indrajit Bhattacharya. 2023. 90% F1 Score in Relation Triple Extraction: Is it Real?. In Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP, pages 1–11, Singapore. Association for Computational Linguistics.