@inproceedings{ficek-etal-2022-tackle,
title = "How to tackle an emerging topic? Combining strong and weak labels for Covid news {NER}",
author = "Ficek, Aleksander and
Liu, Fangyu and
Collier, Nigel",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.60",
pages = "488--496",
abstract = "Being able to train Named Entity Recognition (NER) models for emerging topics is crucial for many real-world applications especially in the medical domain where new topics are continuously evolving out of the scope of existing models and datasets. For a realistic evaluation setup, we introduce a novel COVID-19 news NER dataset (COVIDNEWS-NER) and release 3000 entries of hand annotated strongly labelled sentences and 13000 auto-generated weakly labelled sentences. Besides the dataset, we propose CONTROSTER, a recipe to strategically combine weak and strong labels in improving NER in an emerging topic through transfer learning. We show the effectiveness of CONTROSTER on COVIDNEWS-NER while providing analysis on combining weak and strong labels for training. Our key findings are: (1) Using weak data to formulate an initial backbone before tuning on strong data outperforms methods trained on only strong or weak data. (2) A combination of out-of-domain and in-domain weak label training is crucial and can overcome saturation when being training on weak labels from a single source.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ficek-etal-2022-tackle">
<titleInfo>
<title>How to tackle an emerging topic? Combining strong and weak labels for Covid news NER</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksander</namePart>
<namePart type="family">Ficek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fangyu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nigel</namePart>
<namePart type="family">Collier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Being able to train Named Entity Recognition (NER) models for emerging topics is crucial for many real-world applications especially in the medical domain where new topics are continuously evolving out of the scope of existing models and datasets. For a realistic evaluation setup, we introduce a novel COVID-19 news NER dataset (COVIDNEWS-NER) and release 3000 entries of hand annotated strongly labelled sentences and 13000 auto-generated weakly labelled sentences. Besides the dataset, we propose CONTROSTER, a recipe to strategically combine weak and strong labels in improving NER in an emerging topic through transfer learning. We show the effectiveness of CONTROSTER on COVIDNEWS-NER while providing analysis on combining weak and strong labels for training. Our key findings are: (1) Using weak data to formulate an initial backbone before tuning on strong data outperforms methods trained on only strong or weak data. (2) A combination of out-of-domain and in-domain weak label training is crucial and can overcome saturation when being training on weak labels from a single source.</abstract>
<identifier type="citekey">ficek-etal-2022-tackle</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.60</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>488</start>
<end>496</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How to tackle an emerging topic? Combining strong and weak labels for Covid news NER
%A Ficek, Aleksander
%A Liu, Fangyu
%A Collier, Nigel
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F ficek-etal-2022-tackle
%X Being able to train Named Entity Recognition (NER) models for emerging topics is crucial for many real-world applications especially in the medical domain where new topics are continuously evolving out of the scope of existing models and datasets. For a realistic evaluation setup, we introduce a novel COVID-19 news NER dataset (COVIDNEWS-NER) and release 3000 entries of hand annotated strongly labelled sentences and 13000 auto-generated weakly labelled sentences. Besides the dataset, we propose CONTROSTER, a recipe to strategically combine weak and strong labels in improving NER in an emerging topic through transfer learning. We show the effectiveness of CONTROSTER on COVIDNEWS-NER while providing analysis on combining weak and strong labels for training. Our key findings are: (1) Using weak data to formulate an initial backbone before tuning on strong data outperforms methods trained on only strong or weak data. (2) A combination of out-of-domain and in-domain weak label training is crucial and can overcome saturation when being training on weak labels from a single source.
%U https://aclanthology.org/2022.aacl-short.60
%P 488-496
Markdown (Informal)
[How to tackle an emerging topic? Combining strong and weak labels for Covid news NER](https://aclanthology.org/2022.aacl-short.60) (Ficek et al., AACL-IJCNLP 2022)
ACL
- Aleksander Ficek, Fangyu Liu, and Nigel Collier. 2022. How to tackle an emerging topic? Combining strong and weak labels for Covid news NER. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 488–496, Online only. Association for Computational Linguistics.