@inproceedings{huang-etal-2020-joint-multiple,
title = "A Joint Multiple Criteria Model in Transfer Learning for Cross-domain {C}hinese Word Segmentation",
author = "Huang, Kaiyu and
Huang, Degen and
Liu, Zhuang and
Mo, Fengran",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.318",
doi = "10.18653/v1/2020.emnlp-main.318",
pages = "3873--3882",
abstract = "Word-level information is important in natural language processing (NLP), especially for the Chinese language due to its high linguistic complexity. Chinese word segmentation (CWS) is an essential task for Chinese downstream NLP tasks. Existing methods have already achieved a competitive performance for CWS on large-scale annotated corpora. However, the accuracy of the method will drop dramatically when it handles an unsegmented text with lots of out-of-vocabulary (OOV) words. In addition, there are many different segmentation criteria for addressing different requirements of downstream NLP tasks. Excessive amounts of models with saving different criteria will generate the explosive growth of the total parameters. To this end, we propose a joint multiple criteria model that shares all parameters to integrate different segmentation criteria into one model. Besides, we utilize a transfer learning method to improve the performance of OOV words. Our proposed method is evaluated by designing comprehensive experiments on multiple benchmark datasets (e.g., Bakeoff 2005, Bakeoff 2008 and SIGHAN 2010). Our method achieves the state-of-the-art performances on all datasets. Importantly, our method also shows a competitive practicability and generalization ability for the CWS task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2020-joint-multiple">
<titleInfo>
<title>A Joint Multiple Criteria Model in Transfer Learning for Cross-domain Chinese Word Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Degen</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhuang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengran</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word-level information is important in natural language processing (NLP), especially for the Chinese language due to its high linguistic complexity. Chinese word segmentation (CWS) is an essential task for Chinese downstream NLP tasks. Existing methods have already achieved a competitive performance for CWS on large-scale annotated corpora. However, the accuracy of the method will drop dramatically when it handles an unsegmented text with lots of out-of-vocabulary (OOV) words. In addition, there are many different segmentation criteria for addressing different requirements of downstream NLP tasks. Excessive amounts of models with saving different criteria will generate the explosive growth of the total parameters. To this end, we propose a joint multiple criteria model that shares all parameters to integrate different segmentation criteria into one model. Besides, we utilize a transfer learning method to improve the performance of OOV words. Our proposed method is evaluated by designing comprehensive experiments on multiple benchmark datasets (e.g., Bakeoff 2005, Bakeoff 2008 and SIGHAN 2010). Our method achieves the state-of-the-art performances on all datasets. Importantly, our method also shows a competitive practicability and generalization ability for the CWS task.</abstract>
<identifier type="citekey">huang-etal-2020-joint-multiple</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.318</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.318</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>3873</start>
<end>3882</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Joint Multiple Criteria Model in Transfer Learning for Cross-domain Chinese Word Segmentation
%A Huang, Kaiyu
%A Huang, Degen
%A Liu, Zhuang
%A Mo, Fengran
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F huang-etal-2020-joint-multiple
%X Word-level information is important in natural language processing (NLP), especially for the Chinese language due to its high linguistic complexity. Chinese word segmentation (CWS) is an essential task for Chinese downstream NLP tasks. Existing methods have already achieved a competitive performance for CWS on large-scale annotated corpora. However, the accuracy of the method will drop dramatically when it handles an unsegmented text with lots of out-of-vocabulary (OOV) words. In addition, there are many different segmentation criteria for addressing different requirements of downstream NLP tasks. Excessive amounts of models with saving different criteria will generate the explosive growth of the total parameters. To this end, we propose a joint multiple criteria model that shares all parameters to integrate different segmentation criteria into one model. Besides, we utilize a transfer learning method to improve the performance of OOV words. Our proposed method is evaluated by designing comprehensive experiments on multiple benchmark datasets (e.g., Bakeoff 2005, Bakeoff 2008 and SIGHAN 2010). Our method achieves the state-of-the-art performances on all datasets. Importantly, our method also shows a competitive practicability and generalization ability for the CWS task.
%R 10.18653/v1/2020.emnlp-main.318
%U https://aclanthology.org/2020.emnlp-main.318
%U https://doi.org/10.18653/v1/2020.emnlp-main.318
%P 3873-3882
Markdown (Informal)
[A Joint Multiple Criteria Model in Transfer Learning for Cross-domain Chinese Word Segmentation](https://aclanthology.org/2020.emnlp-main.318) (Huang et al., EMNLP 2020)
ACL