@inproceedings{tu-lignos-2021-tmr,
title = "{TMR}: Evaluating {NER} Recall on Tough Mentions",
author = "Tu, Jingxuan and
Lignos, Constantine",
editor = "Sorodoc, Ionut-Teodor and
Sushil, Madhumita and
Takmaz, Ece and
Agirre, Eneko",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-srw.21/",
doi = "10.18653/v1/2021.eacl-srw.21",
pages = "155--163",
abstract = "We propose the Tough Mentions Recall (TMR) metrics to supplement traditional named entity recognition (NER) evaluation by examining recall on specific subsets of {\textquotedblright}tough{\textquotedblright} mentions: unseen mentions, those whose tokens or token/type combination were not observed in training, and type-confusable mentions, token sequences with multiple entity types in the test data. We demonstrate the usefulness of these metrics by evaluating corpora of English, Spanish, and Dutch using five recent neural architectures. We identify subtle differences between the performance of BERT and Flair on two English NER corpora and identify a weak spot in the performance of current models in Spanish. We conclude that the TMR metrics enable differentiation between otherwise similar-scoring systems and identification of patterns in performance that would go unnoticed from overall precision, recall, and F1."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tu-lignos-2021-tmr">
<titleInfo>
<title>TMR: Evaluating NER Recall on Tough Mentions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jingxuan</namePart>
<namePart type="family">Tu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ionut-Teodor</namePart>
<namePart type="family">Sorodoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madhumita</namePart>
<namePart type="family">Sushil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ece</namePart>
<namePart type="family">Takmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eneko</namePart>
<namePart type="family">Agirre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose the Tough Mentions Recall (TMR) metrics to supplement traditional named entity recognition (NER) evaluation by examining recall on specific subsets of ”tough” mentions: unseen mentions, those whose tokens or token/type combination were not observed in training, and type-confusable mentions, token sequences with multiple entity types in the test data. We demonstrate the usefulness of these metrics by evaluating corpora of English, Spanish, and Dutch using five recent neural architectures. We identify subtle differences between the performance of BERT and Flair on two English NER corpora and identify a weak spot in the performance of current models in Spanish. We conclude that the TMR metrics enable differentiation between otherwise similar-scoring systems and identification of patterns in performance that would go unnoticed from overall precision, recall, and F1.</abstract>
<identifier type="citekey">tu-lignos-2021-tmr</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-srw.21</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-srw.21/</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>155</start>
<end>163</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TMR: Evaluating NER Recall on Tough Mentions
%A Tu, Jingxuan
%A Lignos, Constantine
%Y Sorodoc, Ionut-Teodor
%Y Sushil, Madhumita
%Y Takmaz, Ece
%Y Agirre, Eneko
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F tu-lignos-2021-tmr
%X We propose the Tough Mentions Recall (TMR) metrics to supplement traditional named entity recognition (NER) evaluation by examining recall on specific subsets of ”tough” mentions: unseen mentions, those whose tokens or token/type combination were not observed in training, and type-confusable mentions, token sequences with multiple entity types in the test data. We demonstrate the usefulness of these metrics by evaluating corpora of English, Spanish, and Dutch using five recent neural architectures. We identify subtle differences between the performance of BERT and Flair on two English NER corpora and identify a weak spot in the performance of current models in Spanish. We conclude that the TMR metrics enable differentiation between otherwise similar-scoring systems and identification of patterns in performance that would go unnoticed from overall precision, recall, and F1.
%R 10.18653/v1/2021.eacl-srw.21
%U https://aclanthology.org/2021.eacl-srw.21/
%U https://doi.org/10.18653/v1/2021.eacl-srw.21
%P 155-163
Markdown (Informal)
[TMR: Evaluating NER Recall on Tough Mentions](https://aclanthology.org/2021.eacl-srw.21/) (Tu & Lignos, EACL 2021)
ACL
- Jingxuan Tu and Constantine Lignos. 2021. TMR: Evaluating NER Recall on Tough Mentions. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 155–163, Online. Association for Computational Linguistics.