@InProceedings{gong-EtAl:2017:EMNLP2017,
  author    = {Gong, Chen  and  Li, Zhenghua  and  Zhang, Min  and  Jiang, Xinzhou},
  title     = {Multi-Grained Chinese Word Segmentation},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {692--703},
  abstract  = {Traditionally, word segmentation (WS) adopts the single-grained formalism,
	where a sentence corresponds to a single word sequence. However, Sproat et al.
	(1997) show that the inter-native-speaker consistency ratio over Chinese word
	boundaries is only 76\%, indicating single-grained WS (SWS) imposes unnecessary
	challenges on both manual annotation and statistical modeling.
	Moreover, WS results of different granularities can be complementary and
	beneficial for high-level applications. 
	This work proposes and addresses multi-grained WS (MWS). We build a large-scale
	pseudo MWS dataset for model training and tuning by leveraging the annotation
	heterogeneity of 
	three SWS datasets.
	Then we manually annotate 1,500 test sentences with true MWS annotations. 
	Finally, we propose three benchmark approaches by casting MWS as constituent
	parsing and sequence
	labeling. 
	Experiments and analysis lead to many interesting findings.},
  url       = {https://www.aclweb.org/anthology/D17-1072}
}

