@inproceedings{meng-etal-2021-gemnet,
title = "{GEMNET}: Effective Gated Gazetteer Representations for Recognizing Complex Entities in Low-context Input",
author = "Meng, Tao and
Fang, Anjie and
Rokhlenko, Oleg and
Malmasi, Shervin",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.118/",
doi = "10.18653/v1/2021.naacl-main.118",
pages = "1499--1512",
abstract = "Named Entity Recognition (NER) remains difficult in real-world settings; current challenges include short texts (low context), emerging entities, and complex entities (e.g. movie names). Gazetteer features can help, but results have been mixed due to challenges with adding extra features, and a lack of realistic evaluation data. It has been shown that including gazetteer features can cause models to overuse or underuse them, leading to poor generalization. We propose GEMNET, a novel approach for gazetteer knowledge integration, including (1) a flexible Contextual Gazetteer Representation (CGR) encoder that can be fused with any word-level model; and (2) a Mixture-of- Experts gating network that overcomes the feature overuse issue by learning to conditionally combine the context and gazetteer features, instead of assigning them fixed weights. To comprehensively evaluate our approaches, we create 3 large NER datasets (24M tokens) reflecting current challenges. In an uncased setting, our methods show large gains (up to +49{\%} F1) in recognizing difficult entities compared to existing baselines. On standard benchmarks, we achieve a new uncased SOTA on CoNLL03 and WNUT17."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="meng-etal-2021-gemnet">
<titleInfo>
<title>GEMNET: Effective Gated Gazetteer Representations for Recognizing Complex Entities in Low-context Input</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tao</namePart>
<namePart type="family">Meng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjie</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Rokhlenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Recognition (NER) remains difficult in real-world settings; current challenges include short texts (low context), emerging entities, and complex entities (e.g. movie names). Gazetteer features can help, but results have been mixed due to challenges with adding extra features, and a lack of realistic evaluation data. It has been shown that including gazetteer features can cause models to overuse or underuse them, leading to poor generalization. We propose GEMNET, a novel approach for gazetteer knowledge integration, including (1) a flexible Contextual Gazetteer Representation (CGR) encoder that can be fused with any word-level model; and (2) a Mixture-of- Experts gating network that overcomes the feature overuse issue by learning to conditionally combine the context and gazetteer features, instead of assigning them fixed weights. To comprehensively evaluate our approaches, we create 3 large NER datasets (24M tokens) reflecting current challenges. In an uncased setting, our methods show large gains (up to +49% F1) in recognizing difficult entities compared to existing baselines. On standard benchmarks, we achieve a new uncased SOTA on CoNLL03 and WNUT17.</abstract>
<identifier type="citekey">meng-etal-2021-gemnet</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.118</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.118/</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>1499</start>
<end>1512</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GEMNET: Effective Gated Gazetteer Representations for Recognizing Complex Entities in Low-context Input
%A Meng, Tao
%A Fang, Anjie
%A Rokhlenko, Oleg
%A Malmasi, Shervin
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F meng-etal-2021-gemnet
%X Named Entity Recognition (NER) remains difficult in real-world settings; current challenges include short texts (low context), emerging entities, and complex entities (e.g. movie names). Gazetteer features can help, but results have been mixed due to challenges with adding extra features, and a lack of realistic evaluation data. It has been shown that including gazetteer features can cause models to overuse or underuse them, leading to poor generalization. We propose GEMNET, a novel approach for gazetteer knowledge integration, including (1) a flexible Contextual Gazetteer Representation (CGR) encoder that can be fused with any word-level model; and (2) a Mixture-of- Experts gating network that overcomes the feature overuse issue by learning to conditionally combine the context and gazetteer features, instead of assigning them fixed weights. To comprehensively evaluate our approaches, we create 3 large NER datasets (24M tokens) reflecting current challenges. In an uncased setting, our methods show large gains (up to +49% F1) in recognizing difficult entities compared to existing baselines. On standard benchmarks, we achieve a new uncased SOTA on CoNLL03 and WNUT17.
%R 10.18653/v1/2021.naacl-main.118
%U https://aclanthology.org/2021.naacl-main.118/
%U https://doi.org/10.18653/v1/2021.naacl-main.118
%P 1499-1512
Markdown (Informal)
[GEMNET: Effective Gated Gazetteer Representations for Recognizing Complex Entities in Low-context Input](https://aclanthology.org/2021.naacl-main.118/) (Meng et al., NAACL 2021)
ACL