@inproceedings{amplayo-etal-2019-evaluating,
title = "Evaluating Research Novelty Detection: Counterfactual Approaches",
author = "Amplayo, Reinald Kim and
Hwang, Seung-won and
Song, Min",
editor = "Ustalov, Dmitry and
Somasundaran, Swapna and
Jansen, Peter and
Glava{\v{s}}, Goran and
Riedl, Martin and
Surdeanu, Mihai and
Vazirgiannis, Michalis",
booktitle = "Proceedings of the Thirteenth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-13)",
month = nov,
year = "2019",
address = "Hong Kong",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-5315",
doi = "10.18653/v1/D19-5315",
pages = "124--133",
abstract = "In this paper, we explore strategies to evaluate models for the task research paper novelty detection: Given all papers released at a given date, which of the papers discuss new ideas and influence future research? We find the novelty is not a singular concept, and thus inherently lacks of ground truth annotations with cross-annotator agreement, which is a major obstacle in evaluating these models. Test-of-time award is closest to such annotation, which can only be made retrospectively and is extremely scarce. We thus propose to compare and evaluate models using counterfactual simulations. First, we ask models if they can differentiate papers at time $t$ and counterfactual paper from future time $t+d$. Second, we ask models if they can predict test-of-time award at $t+d$. These are proxies that can be agreed by human annotators and easily augmented by correlated signals, using which evaluation can be done through four tasks: classification, ranking, correlation and feature selection. We show these proxy evaluation methods complement each other regarding error handling, coverage, interpretability, and scope, and thus altogether contribute to the observation of the relative strength of existing models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amplayo-etal-2019-evaluating">
<titleInfo>
<title>Evaluating Research Novelty Detection: Counterfactual Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Reinald</namePart>
<namePart type="given">Kim</namePart>
<namePart type="family">Amplayo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-won</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-13)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapna</namePart>
<namePart type="family">Somasundaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Goran</namePart>
<namePart type="family">Glavaš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Riedl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michalis</namePart>
<namePart type="family">Vazirgiannis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we explore strategies to evaluate models for the task research paper novelty detection: Given all papers released at a given date, which of the papers discuss new ideas and influence future research? We find the novelty is not a singular concept, and thus inherently lacks of ground truth annotations with cross-annotator agreement, which is a major obstacle in evaluating these models. Test-of-time award is closest to such annotation, which can only be made retrospectively and is extremely scarce. We thus propose to compare and evaluate models using counterfactual simulations. First, we ask models if they can differentiate papers at time t and counterfactual paper from future time t+d. Second, we ask models if they can predict test-of-time award at t+d. These are proxies that can be agreed by human annotators and easily augmented by correlated signals, using which evaluation can be done through four tasks: classification, ranking, correlation and feature selection. We show these proxy evaluation methods complement each other regarding error handling, coverage, interpretability, and scope, and thus altogether contribute to the observation of the relative strength of existing models.</abstract>
<identifier type="citekey">amplayo-etal-2019-evaluating</identifier>
<identifier type="doi">10.18653/v1/D19-5315</identifier>
<location>
<url>https://aclanthology.org/D19-5315</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>124</start>
<end>133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Research Novelty Detection: Counterfactual Approaches
%A Amplayo, Reinald Kim
%A Hwang, Seung-won
%A Song, Min
%Y Ustalov, Dmitry
%Y Somasundaran, Swapna
%Y Jansen, Peter
%Y Glavaš, Goran
%Y Riedl, Martin
%Y Surdeanu, Mihai
%Y Vazirgiannis, Michalis
%S Proceedings of the Thirteenth Workshop on Graph-Based Methods for Natural Language Processing (TextGraphs-13)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong
%F amplayo-etal-2019-evaluating
%X In this paper, we explore strategies to evaluate models for the task research paper novelty detection: Given all papers released at a given date, which of the papers discuss new ideas and influence future research? We find the novelty is not a singular concept, and thus inherently lacks of ground truth annotations with cross-annotator agreement, which is a major obstacle in evaluating these models. Test-of-time award is closest to such annotation, which can only be made retrospectively and is extremely scarce. We thus propose to compare and evaluate models using counterfactual simulations. First, we ask models if they can differentiate papers at time t and counterfactual paper from future time t+d. Second, we ask models if they can predict test-of-time award at t+d. These are proxies that can be agreed by human annotators and easily augmented by correlated signals, using which evaluation can be done through four tasks: classification, ranking, correlation and feature selection. We show these proxy evaluation methods complement each other regarding error handling, coverage, interpretability, and scope, and thus altogether contribute to the observation of the relative strength of existing models.
%R 10.18653/v1/D19-5315
%U https://aclanthology.org/D19-5315
%U https://doi.org/10.18653/v1/D19-5315
%P 124-133
Markdown (Informal)
[Evaluating Research Novelty Detection: Counterfactual Approaches](https://aclanthology.org/D19-5315) (Amplayo et al., TextGraphs 2019)
ACL