@InProceedings{mortensen-EtAl:2016:COLING,
  author    = {Mortensen, David R.  and  Littell, Patrick  and  Bharadwaj, Akash  and  Goyal, Kartik  and  Dyer, Chris  and  Levin, Lori},
  title     = {PanPhon: A Resource for Mapping IPA Segments to Articulatory Feature Vectors},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3475--3484},
  abstract  = {This paper contributes to a growing body of evidence that---when coupled with
	appropriate machine-learning techniques--linguistically motivated,
	information-rich representations can outperform one-hot encodings of linguistic
	data. In particular, we show that phonological features outperform
	character-based models. PanPhon is a database relating over 5,000 IPA segments
	to 21 subsegmental articulatory features. We show that this database boosts
	performance in various NER-related tasks. Phonologically aware, neural CRF
	models built on PanPhon features are able to perform better on monolingual
	Spanish and Turkish NER tasks that character-based models. They have also been
	shown to work well in transfer models (as between Uzbek and Turkish). PanPhon
	features also contribute measurably to Orthography-to-IPA conversion tasks.},
  url       = {http://aclweb.org/anthology/C16-1328}
}

