@InProceedings{anastasopoulos-EtAl:2017:Speech-Centric,
  author    = {Anastasopoulos, Antonios  and  Bansal, Sameer  and  Chiang, David  and  Goldwater, Sharon  and  Lopez, Adam},
  title     = {Spoken Term Discovery for Language Documentation using Translations},
  booktitle = {Proceedings of the Workshop on Speech-Centric Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {53--58},
  abstract  = {Vast amounts of speech data collected for language documentation and research
	remain untranscribed and unsearchable, but often a small amount of speech may
	have text translations available. We present a method for partially labeling
	additional speech with translations in this scenario. We modify an unsupervised
	speech-to-translation alignment model and obtain prototype speech segments that
	match the translation words, which are in turn used to discover terms in the
	unlabelled data. We evaluate our method on a Spanish-English speech translation
	corpus and on two corpora of endangered languages, Arapaho and Ainu,
	demonstrating its appropriateness and applicability in an actual
	very-low-resource scenario.},
  url       = {http://www.aclweb.org/anthology/W17-4607}
}

