@inproceedings{bhardwaj-etal-2019-carb,
title = "{C}a{RB}: A Crowdsourced Benchmark for Open {IE}",
author = "Bhardwaj, Sangnie and
Aggarwal, Samarth and
{Mausam}",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1651",
doi = "10.18653/v1/D19-1651",
pages = "6262--6267",
abstract = "Open Information Extraction (Open IE) systems have been traditionally evaluated via manual annotation. Recently, an automated evaluator with a benchmark dataset (OIE2016) was released {--} it scores Open IE systems automatically by matching system predictions with predictions in the benchmark dataset. Unfortunately, our analysis reveals that its data is rather noisy, and the tuple matching in the evaluator has issues, making the results of automated comparisons less trustworthy. We contribute CaRB, an improved dataset and framework for testing Open IE systems. To the best of our knowledge, CaRB is the first crowdsourced Open IE dataset and it also makes substantive changes in the matching code and metrics. NLP experts annotate CaRB{'}s dataset to be more accurate than OIE2016. Moreover, we find that on one pair of Open IE systems, CaRB framework provides contradictory results to OIE2016. Human assessment verifies that CaRB{'}s ranking of the two systems is the accurate ranking. We release the CaRB framework along with its crowdsourced dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhardwaj-etal-2019-carb">
<titleInfo>
<title>CaRB: A Crowdsourced Benchmark for Open IE</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sangnie</namePart>
<namePart type="family">Bhardwaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samarth</namePart>
<namePart type="family">Aggarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name>
<namePart>Mausam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open Information Extraction (Open IE) systems have been traditionally evaluated via manual annotation. Recently, an automated evaluator with a benchmark dataset (OIE2016) was released – it scores Open IE systems automatically by matching system predictions with predictions in the benchmark dataset. Unfortunately, our analysis reveals that its data is rather noisy, and the tuple matching in the evaluator has issues, making the results of automated comparisons less trustworthy. We contribute CaRB, an improved dataset and framework for testing Open IE systems. To the best of our knowledge, CaRB is the first crowdsourced Open IE dataset and it also makes substantive changes in the matching code and metrics. NLP experts annotate CaRB’s dataset to be more accurate than OIE2016. Moreover, we find that on one pair of Open IE systems, CaRB framework provides contradictory results to OIE2016. Human assessment verifies that CaRB’s ranking of the two systems is the accurate ranking. We release the CaRB framework along with its crowdsourced dataset.</abstract>
<identifier type="citekey">bhardwaj-etal-2019-carb</identifier>
<identifier type="doi">10.18653/v1/D19-1651</identifier>
<location>
<url>https://aclanthology.org/D19-1651</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>6262</start>
<end>6267</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CaRB: A Crowdsourced Benchmark for Open IE
%A Bhardwaj, Sangnie
%A Aggarwal, Samarth
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%A Mausam
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F bhardwaj-etal-2019-carb
%X Open Information Extraction (Open IE) systems have been traditionally evaluated via manual annotation. Recently, an automated evaluator with a benchmark dataset (OIE2016) was released – it scores Open IE systems automatically by matching system predictions with predictions in the benchmark dataset. Unfortunately, our analysis reveals that its data is rather noisy, and the tuple matching in the evaluator has issues, making the results of automated comparisons less trustworthy. We contribute CaRB, an improved dataset and framework for testing Open IE systems. To the best of our knowledge, CaRB is the first crowdsourced Open IE dataset and it also makes substantive changes in the matching code and metrics. NLP experts annotate CaRB’s dataset to be more accurate than OIE2016. Moreover, we find that on one pair of Open IE systems, CaRB framework provides contradictory results to OIE2016. Human assessment verifies that CaRB’s ranking of the two systems is the accurate ranking. We release the CaRB framework along with its crowdsourced dataset.
%R 10.18653/v1/D19-1651
%U https://aclanthology.org/D19-1651
%U https://doi.org/10.18653/v1/D19-1651
%P 6262-6267
Markdown (Informal)
[CaRB: A Crowdsourced Benchmark for Open IE](https://aclanthology.org/D19-1651) (Bhardwaj et al., EMNLP-IJCNLP 2019)
ACL
- Sangnie Bhardwaj, Samarth Aggarwal, and Mausam. 2019. CaRB: A Crowdsourced Benchmark for Open IE. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 6262–6267, Hong Kong, China. Association for Computational Linguistics.