@InProceedings{jager-list-sofroniev:2017:EACLlong,
  author    = {J\"{a}ger, Gerhard  and  List, Johann-Mattis  and  Sofroniev, Pavel},
  title     = {Using support vector machines and state-of-the-art algorithms for phonetic alignment to identify cognates in multi-lingual wordlists},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {1205--1216},
  abstract  = {Most current approaches in phylogenetic linguistics require as input
	multilingual word lists partitioned into sets of etymologically related words
	(cognates). Cognate identification is so far done manually by experts, which is
	time consuming and as of yet only available for a small number of well-studied
	language families.  Automatizing this step will greatly expand the empirical
	scope of phylogenetic methods in linguistics, as raw wordlists (in phonetic
	transcription) are much easier to obtain than wordlists in which cognate words
	have been fully identified and annotated, even for under-studied languages.  A
	couple of different methods have been proposed in the past, but they are either
	disappointing regarding their performance or not applicable to larger datasets.
	 Here we present a new approach that uses support vector machines to unify
	different state-of-the-art methods for phonetic alignment and cognate detection
	within a single framework. Training and evaluating these method on a
	typologically broad collection of gold-standard data shows it to be superior to
	the existing state of the art.},
  url       = {http://www.aclweb.org/anthology/E17-1113}
}

