@inproceedings{lester-etal-2020-constrained,
title = "Constrained Decoding for Computationally Efficient Named Entity Recognition Taggers",
author = "Lester, Brian and
Pressel, Daniel and
Hemmeter, Amy and
Ray Choudhury, Sagnik and
Bangalore, Srinivas",
editor = "Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2020",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.findings-emnlp.166",
doi = "10.18653/v1/2020.findings-emnlp.166",
pages = "1841--1848",
abstract = "Current state-of-the-art models for named entity recognition (NER) are neural models with a conditional random field (CRF) as the final layer. Entities are represented as per-token labels with a special structure in order to decode them into spans. Current work eschews prior knowledge of how the span encoding scheme works and relies on the CRF learning which transitions are illegal and which are not to facilitate global coherence. We find that by constraining the output to suppress illegal transitions we can train a tagger with a cross-entropy loss twice as fast as a CRF with differences in F1 that are statistically insignificant, effectively eliminating the need for a CRF. We analyze the dynamics of tag co-occurrence to explain when these constraints are most effective and provide open source implementations of our tagger in both PyTorch and TensorFlow.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lester-etal-2020-constrained">
<titleInfo>
<title>Constrained Decoding for Computationally Efficient Named Entity Recognition Taggers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Lester</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Pressel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amy</namePart>
<namePart type="family">Hemmeter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sagnik</namePart>
<namePart type="family">Ray Choudhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivas</namePart>
<namePart type="family">Bangalore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2020</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current state-of-the-art models for named entity recognition (NER) are neural models with a conditional random field (CRF) as the final layer. Entities are represented as per-token labels with a special structure in order to decode them into spans. Current work eschews prior knowledge of how the span encoding scheme works and relies on the CRF learning which transitions are illegal and which are not to facilitate global coherence. We find that by constraining the output to suppress illegal transitions we can train a tagger with a cross-entropy loss twice as fast as a CRF with differences in F1 that are statistically insignificant, effectively eliminating the need for a CRF. We analyze the dynamics of tag co-occurrence to explain when these constraints are most effective and provide open source implementations of our tagger in both PyTorch and TensorFlow.</abstract>
<identifier type="citekey">lester-etal-2020-constrained</identifier>
<identifier type="doi">10.18653/v1/2020.findings-emnlp.166</identifier>
<location>
<url>https://aclanthology.org/2020.findings-emnlp.166</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>1841</start>
<end>1848</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constrained Decoding for Computationally Efficient Named Entity Recognition Taggers
%A Lester, Brian
%A Pressel, Daniel
%A Hemmeter, Amy
%A Ray Choudhury, Sagnik
%A Bangalore, Srinivas
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Findings of the Association for Computational Linguistics: EMNLP 2020
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lester-etal-2020-constrained
%X Current state-of-the-art models for named entity recognition (NER) are neural models with a conditional random field (CRF) as the final layer. Entities are represented as per-token labels with a special structure in order to decode them into spans. Current work eschews prior knowledge of how the span encoding scheme works and relies on the CRF learning which transitions are illegal and which are not to facilitate global coherence. We find that by constraining the output to suppress illegal transitions we can train a tagger with a cross-entropy loss twice as fast as a CRF with differences in F1 that are statistically insignificant, effectively eliminating the need for a CRF. We analyze the dynamics of tag co-occurrence to explain when these constraints are most effective and provide open source implementations of our tagger in both PyTorch and TensorFlow.
%R 10.18653/v1/2020.findings-emnlp.166
%U https://aclanthology.org/2020.findings-emnlp.166
%U https://doi.org/10.18653/v1/2020.findings-emnlp.166
%P 1841-1848
Markdown (Informal)
[Constrained Decoding for Computationally Efficient Named Entity Recognition Taggers](https://aclanthology.org/2020.findings-emnlp.166) (Lester et al., Findings 2020)
ACL