@InProceedings{vijayakumar-vedantam-parikh:2017:EMNLP2017,
  author    = {Vijayakumar, Ashwin  and  Vedantam, Ramakrishna  and  Parikh, Devi},
  title     = {Sound-Word2Vec: Learning Word Representations Grounded in Sounds},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {920--925},
  abstract  = {To be able to interact better with humans, it is crucial for machines to
	understand sound -- a primary modality of human perception. Previous works
	have used sound to learn embeddings for improved generic semantic similarity
	assessment. In this work, we treat sound as a first-class citizen, studying
	downstream 6textual tasks which require aural grounding. To this end, we
	propose sound-word2vec -- a new embedding scheme that learns specialized word
	embeddings grounded in sounds. For example, we learn that two seemingly (se-
	mantically) unrelated concepts, like leaves and paper are similar due to the
	similar rustling sounds they make. Our embed- dings prove useful in textual
	tasks requiring aural reasoning like text-based sound retrieval and discovering
	Foley sound effects (used in movies). Moreover, our em- bedding space captures
	interesting dependencies between words and onomatopoeia and outperforms prior
	work on aurally- relevant word relatedness datasets such as AMEN and ASLex.},
  url       = {https://www.aclweb.org/anthology/D17-1096}
}

