@InProceedings{ikeda-shindo-matsumoto:2016:WNUT,
  author    = {Ikeda, Taishi  and  Shindo, Hiroyuki  and  Matsumoto, Yuji},
  title     = {Japanese Text Normalization with Encoder-Decoder Model},
  booktitle = {Proceedings of the 2nd Workshop on Noisy User-generated Text (WNUT)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {129--137},
  abstract  = {Text normalization is the task of transforming lexical variants to their
	canonical forms.
	We model the problem of text normalization as a character-level sequence to
	sequence learning problem
	and present a neural encoder-decoder model for solving it.
	To train the encoder-decoder model, many sentences pairs are generally
	required.
	However, Japanese non-standard canonical pairs are scarce in the form of
	parallel corpora.
	To address this issue, we propose a method of data augmentation to increase
	data size
	by converting existing resources into synthesized non-standard forms using
	handcrafted rules. 
	We conducted an experiment to demonstrate that the synthesized corpus
	contributes to stably train an encoder-decoder model and improve the
	performance of Japanese text normalization.},
  url       = {http://aclweb.org/anthology/W16-3918}
}

