@InProceedings{kajiwara-komachi:2016:COLING,
  author    = {Kajiwara, Tomoyuki  and  Komachi, Mamoru},
  title     = {Building a Monolingual Parallel Corpus for Text Simplification Using Sentence Similarity Based on Alignment between Word Embeddings},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1147--1158},
  abstract  = {Methods for text simplification using the framework of statistical machine
	translation have been extensively studied in recent years.
	However, building the monolingual parallel corpus necessary for training the
	model requires costly human annotation.
	Monolingual parallel corpora for text simplification have therefore been built
	only for a limited number of languages, such as English and Portuguese.
	To obviate the need for human annotation, we propose an unsupervised method
	that automatically builds the monolingual parallel corpus for text
	simplification using sentence similarity based on word embeddings.
	For any sentence pair comprising a complex sentence and its simple counterpart,
	we employ a many-to-one method of aligning each word in the complex sentence
	with the most similar word in the simple sentence and compute sentence
	similarity by averaging these word similarities. 
	The experimental results demonstrate the excellent performance of the proposed
	method in a monolingual parallel corpus construction task for English text
	simplification.
	The results also demonstrated the superior accuracy in text simplification that
	use the framework of statistical machine translation trained using the corpus
	built by the proposed method to that using the existing corpora.},
  url       = {http://aclweb.org/anthology/C16-1109}
}

