@inproceedings{liu-etal-2019-encoding,
title = "An Encoding Strategy Based Word-Character {LSTM} for {C}hinese {NER}",
author = "Liu, Wei and
Xu, Tongge and
Xu, Qinghua and
Song, Jiayu and
Zu, Yueran",
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1247",
doi = "10.18653/v1/N19-1247",
pages = "2379--2389",
abstract = "A recently proposed lattice model has demonstrated that words in character sequence can provide rich word boundary information for character-based Chinese NER model. In this model, word information is integrated into a shortcut path between the start and the end characters of the word. However, the existence of shortcut path may cause the model to degenerate into a partial word-based model, which will suffer from word segmentation errors. Furthermore, the lattice model can not be trained in batches due to its DAG structure. In this paper, we propose a novel word-character LSTM(WC-LSTM) model to add word information into the start or the end character of the word, alleviating the influence of word segmentation errors while obtaining the word boundary information. Four different strategies are explored in our model to encode word information into a fixed-sized representation for efficient batch training. Experiments on benchmark datasets show that our proposed model outperforms other state-of-the-arts models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2019-encoding">
<titleInfo>
<title>An Encoding Strategy Based Word-Character LSTM for Chinese NER</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tongge</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qinghua</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiayu</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yueran</namePart>
<namePart type="family">Zu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christy</namePart>
<namePart type="family">Doran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A recently proposed lattice model has demonstrated that words in character sequence can provide rich word boundary information for character-based Chinese NER model. In this model, word information is integrated into a shortcut path between the start and the end characters of the word. However, the existence of shortcut path may cause the model to degenerate into a partial word-based model, which will suffer from word segmentation errors. Furthermore, the lattice model can not be trained in batches due to its DAG structure. In this paper, we propose a novel word-character LSTM(WC-LSTM) model to add word information into the start or the end character of the word, alleviating the influence of word segmentation errors while obtaining the word boundary information. Four different strategies are explored in our model to encode word information into a fixed-sized representation for efficient batch training. Experiments on benchmark datasets show that our proposed model outperforms other state-of-the-arts models.</abstract>
<identifier type="citekey">liu-etal-2019-encoding</identifier>
<identifier type="doi">10.18653/v1/N19-1247</identifier>
<location>
<url>https://aclanthology.org/N19-1247</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>2379</start>
<end>2389</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Encoding Strategy Based Word-Character LSTM for Chinese NER
%A Liu, Wei
%A Xu, Tongge
%A Xu, Qinghua
%A Song, Jiayu
%A Zu, Yueran
%Y Burstein, Jill
%Y Doran, Christy
%Y Solorio, Thamar
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F liu-etal-2019-encoding
%X A recently proposed lattice model has demonstrated that words in character sequence can provide rich word boundary information for character-based Chinese NER model. In this model, word information is integrated into a shortcut path between the start and the end characters of the word. However, the existence of shortcut path may cause the model to degenerate into a partial word-based model, which will suffer from word segmentation errors. Furthermore, the lattice model can not be trained in batches due to its DAG structure. In this paper, we propose a novel word-character LSTM(WC-LSTM) model to add word information into the start or the end character of the word, alleviating the influence of word segmentation errors while obtaining the word boundary information. Four different strategies are explored in our model to encode word information into a fixed-sized representation for efficient batch training. Experiments on benchmark datasets show that our proposed model outperforms other state-of-the-arts models.
%R 10.18653/v1/N19-1247
%U https://aclanthology.org/N19-1247
%U https://doi.org/10.18653/v1/N19-1247
%P 2379-2389
Markdown (Informal)
[An Encoding Strategy Based Word-Character LSTM for Chinese NER](https://aclanthology.org/N19-1247) (Liu et al., NAACL 2019)
ACL
- Wei Liu, Tongge Xu, Qinghua Xu, Jiayu Song, and Yueran Zu. 2019. An Encoding Strategy Based Word-Character LSTM for Chinese NER. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 2379–2389, Minneapolis, Minnesota. Association for Computational Linguistics.