@inproceedings{lignos-kamyab-2020-build,
title = "If You Build Your Own {NER} Scorer, Non-replicable Results Will Come",
author = "Lignos, Constantine and
Kamyab, Marjan",
editor = "Rogers, Anna and
Sedoc, Jo{\~a}o and
Rumshisky, Anna",
booktitle = "Proceedings of the First Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.insights-1.15",
doi = "10.18653/v1/2020.insights-1.15",
pages = "94--99",
abstract = "We attempt to replicate a named entity recognition (NER) model implemented in a popular toolkit and discover that a critical barrier to doing so is the inconsistent evaluation of improper label sequences. We define these sequences and examine how two scorers differ in their handling of them, finding that one approach produces F1 scores approximately 0.5 points higher on the CoNLL 2003 English development and test sets. We propose best practices to increase the replicability of NER evaluations by increasing transparency regarding the handling of improper label sequences.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lignos-kamyab-2020-build">
<titleInfo>
<title>If You Build Your Own NER Scorer, Non-replicable Results Will Come</title>
</titleInfo>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marjan</namePart>
<namePart type="family">Kamyab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We attempt to replicate a named entity recognition (NER) model implemented in a popular toolkit and discover that a critical barrier to doing so is the inconsistent evaluation of improper label sequences. We define these sequences and examine how two scorers differ in their handling of them, finding that one approach produces F1 scores approximately 0.5 points higher on the CoNLL 2003 English development and test sets. We propose best practices to increase the replicability of NER evaluations by increasing transparency regarding the handling of improper label sequences.</abstract>
<identifier type="citekey">lignos-kamyab-2020-build</identifier>
<identifier type="doi">10.18653/v1/2020.insights-1.15</identifier>
<location>
<url>https://aclanthology.org/2020.insights-1.15</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>94</start>
<end>99</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T If You Build Your Own NER Scorer, Non-replicable Results Will Come
%A Lignos, Constantine
%A Kamyab, Marjan
%Y Rogers, Anna
%Y Sedoc, João
%Y Rumshisky, Anna
%S Proceedings of the First Workshop on Insights from Negative Results in NLP
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lignos-kamyab-2020-build
%X We attempt to replicate a named entity recognition (NER) model implemented in a popular toolkit and discover that a critical barrier to doing so is the inconsistent evaluation of improper label sequences. We define these sequences and examine how two scorers differ in their handling of them, finding that one approach produces F1 scores approximately 0.5 points higher on the CoNLL 2003 English development and test sets. We propose best practices to increase the replicability of NER evaluations by increasing transparency regarding the handling of improper label sequences.
%R 10.18653/v1/2020.insights-1.15
%U https://aclanthology.org/2020.insights-1.15
%U https://doi.org/10.18653/v1/2020.insights-1.15
%P 94-99
Markdown (Informal)
[If You Build Your Own NER Scorer, Non-replicable Results Will Come](https://aclanthology.org/2020.insights-1.15) (Lignos & Kamyab, insights 2020)
ACL