@InProceedings{li-EtAl:2018:K18-1,
  author    = {Li, Min  and  Danilevsky, Marina  and  Noeman, Sara  and  Li, Yunyao},
  title     = {{DIMSIM}: An Accurate Chinese Phonetic Similarity Algorithm Based on Learned High Dimensional Encoding},
  booktitle = {Proceedings of the 22nd Conference on Computational Natural Language Learning},
  month     = {October},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {444--453},
  abstract  = {Phonetic similarity algorithms identify words and phrases with similar pronunciation which are used in many natural language processing tasks. However, existing approaches are designed mainly for Indo-European languages and fail to capture the unique properties of Chinese pronunciation. In this paper, we propose a high dimensional encoded phonetic similarity algorithm for Chinese, DIMSIM. The encodings are learned from annotated data to separately map initial and final phonemes into n-dimensional coordinates. Pinyin phonetic similarities are then calculated by aggregating the similarities of initial, final and tone. DIMSIM demonstrates a 7.5X improvement on mean reciprocal rank over the state-of-the-art phonetic similarity approaches.},
  url       = {http://www.aclweb.org/anthology/K18-1043}
}

