<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W17">
  <paper id="5300">
    <title>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</title>
    <editor>Samuel Bowman</editor>
    <editor>Yoav Goldberg</editor>
    <editor>Felix Hill</editor>
    <editor>Angeliki Lazaridou</editor>
    <editor>Omer Levy</editor>
    <editor>Roi Reichart</editor>
    <editor>Anders Søgaard</editor>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <url>http://www.aclweb.org/anthology/W17-53</url>
    <bibtype>book</bibtype>
    <bibkey>RepEval:2017</bibkey>
  </paper>

  <paper id="5301">
    <title>The RepEval 2017 Shared Task: Multi-Genre Natural Language Inference with Sentence Representations</title>
    <author><first>Nikita</first><last>Nangia</last></author>
    <author><first>Adina</first><last>Williams</last></author>
    <author><first>Angeliki</first><last>Lazaridou</last></author>
    <author><first>Samuel</first><last>Bowman</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>1&#8211;10</pages>
    <url>http://www.aclweb.org/anthology/W17-5301</url>
    <abstract>This paper presents the results of the RepEval 2017 Shared Task, which
	evaluated neural network sentence representation learning models on the
	Multi-Genre Natural Language Inference corpus (MultiNLI) recently introduced by
	Williams et al. (2017). All of the five participating teams beat the
	bidirectional LSTM (BiLSTM) and continuous bag of words baselines reported in
	Williams et al. The best single model used stacked BiLSTMs with residual
	connections to extract sentence features and reached 74.5% accuracy on the
	genre-matched test set. Surprisingly, the results of the competition were
	fairly consistent across the genre-matched and genre-mismatched test sets, and
	across subsets of the test data representing a variety of linguistic phenomena,
	suggesting that all of the submitted systems learned reasonably
	domain-independent representations for sentence meaning.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>nangia-EtAl:2017:RepEval</bibkey>
  </paper>

  <paper id="5302">
    <title>Traversal-Free Word Vector Evaluation in Analogy Space</title>
    <author><first>Xiaoyin</first><last>Che</last></author>
    <author><first>Nico</first><last>Ring</last></author>
    <author><first>Willi</first><last>Raschkowski</last></author>
    <author><first>Haojin</first><last>Yang</last></author>
    <author><first>Christoph</first><last>Meinel</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>11&#8211;15</pages>
    <url>http://www.aclweb.org/anthology/W17-5302</url>
    <abstract>In this paper, we propose an alternative evaluating metric for word analogy
	questions (A to B is as C to D) in word vector evaluation. Different from the
	traditional method which predicts the fourth word by the given three, we
	measure the similarity directly on the "relations" of two pairs of given words,
	just as shifting the relation vectors into a new analogy space. Cosine and
	Euclidean distances are then calculated as measurements. Observation and
	experiments shows the proposed analogy space evaluation could offer a more
	comprehensive evaluating result on word vectors with word analogy questions.
	Meanwhile, computational complexity are remarkably reduced by avoiding
	traversing the vocabulary.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>che-EtAl:2017:RepEval</bibkey>
  </paper>

  <paper id="5303">
    <title>Hypothesis Testing based Intrinsic Evaluation of Word Embeddings</title>
    <author><first>Nishant</first><last>Gurnani</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>16&#8211;20</pages>
    <url>http://www.aclweb.org/anthology/W17-5303</url>
    <abstract>We introduce the cross-match test - an exact, distribution free,
	high-dimensional hypothesis test as an intrinsic evaluation metric for word
	embeddings. We show that cross-match is an effective means of measuring the
	distributional similarity between different vector representations and of
	evaluating the statistical significance of different vector embedding models.
	Additionally, we find that cross-match can be used to provide a quantitative
	measure of linguistic similarity for selecting bridge languages for machine
	translation. We demonstrate that the results of the hypothesis test align with
	our expectations and note that the framework of two sample hypothesis testing
	is not limited to word embeddings and can be extended to all vector
	representations.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>gurnani:2017:RepEval</bibkey>
  </paper>

  <paper id="5304">
    <title>Evaluation of word embeddings against cognitive processes: primed reaction times in lexical decision and naming tasks</title>
    <author><first>Jeremy</first><last>Auguste</last></author>
    <author><first>Arnaud</first><last>Rey</last></author>
    <author><first>Benoit</first><last>Favre</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>21&#8211;26</pages>
    <url>http://www.aclweb.org/anthology/W17-5304</url>
    <abstract>This work presents a framework for word similarity evaluation grounded on
	cognitive sciences experimental data. Word pair similarities are compared to
	reaction times of subjects in large scale lexical decision and naming tasks
	under semantic priming. Results show that GloVe embeddings lead to
	significantly higher correlation with experimental measurements than other
	controlled and off-the-shelf embeddings, and that the choice of a training
	corpus is less important than that of the algorithm. Comparison of rankings
	with other datasets shows that the cognitive phenomenon covers more aspects
	than simply word relatedness or similarity.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>auguste-rey-favre:2017:RepEval</bibkey>
  </paper>

  <paper id="5305">
    <title>Playing with Embeddings : Evaluating embeddings for Robot Language Learning through MUD Games</title>
    <author><first>Anmol</first><last>Gulati</last></author>
    <author><first>Kumar Krishna</first><last>Agrawal</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>27&#8211;30</pages>
    <url>http://www.aclweb.org/anthology/W17-5305</url>
    <abstract>Acquiring language provides a ubiquitous mode of communication, across humans
	and robots. To this effect, distributional representations of words based on
	co-occurrence statistics, have provided significant advancements ranging across
	machine translation to comprehension. In this paper, we study the suitability
	of using general purpose word-embeddings for language learning in robots. We
	propose using text-based games as a proxy to evaluating word embedding on real
	robots. Based in a risk-reward setting, we review the effectiveness of the
	embeddings in navigating tasks in fantasy games, as an approximation to their
	performance on more complex scenarios, like language assisted robot navigation.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>gulati-agrawal:2017:RepEval</bibkey>
  </paper>

  <paper id="5306">
    <title>Recognizing Textual Entailment in Twitter Using Word Embeddings</title>
    <author><first>Octavia-Maria</first><last>&#x15E;ulea</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>31&#8211;35</pages>
    <url>http://www.aclweb.org/anthology/W17-5306</url>
    <abstract>In this paper, we investigate the application of machine learning techniques
	and word embeddings to the task of Recognizing Textual Entailment (RTE) in
	Social Media. We look at a manually labeled dataset consisting of user
	generated short texts posted on Twitter (tweets) and related to four recent
	media events (the Charlie Hebdo shooting, the Ottawa shooting, the Sydney
	Siege, and the German Wings crash) and test to what extent neural techniques
	and embeddings are able to distinguish between tweets that entail or contradict
	each other or that claim unrelated things. We obtain comparable results to the
	state of the art in a train-test setting, but we show that, due to the noisy
	aspect of the data, results plummet in an evaluation strategy crafted to better
	simulate a real-life train-test scenario.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>csulea:2017:RepEval</bibkey>
  </paper>

  <paper id="5307">
    <title>Recurrent Neural Network-Based Sentence Encoder with Gated Attention for Natural Language Inference</title>
    <author><first>Qian</first><last>Chen</last></author>
    <author><first>Xiaodan</first><last>Zhu</last></author>
    <author><first>Zhen-Hua</first><last>Ling</last></author>
    <author><first>Si</first><last>Wei</last></author>
    <author><first>Hui</first><last>Jiang</last></author>
    <author><first>Diana</first><last>Inkpen</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>36&#8211;40</pages>
    <url>http://www.aclweb.org/anthology/W17-5307</url>
    <abstract>The RepEval 2017 Shared Task aims to evaluate natural language understanding
	models for sentence representation, in which a sentence is represented as a
	fixed-length vector with neural networks and the quality of the representation
	is tested with a natural language inference task. This paper describes our
	system (alpha) that is ranked among the top in the Shared Task, on both the
	in-domain test set (obtaining a 74.9% accuracy) and on the cross-domain test
	set (also attaining a 74.9% accuracy), demonstrating that the model generalizes
	well to the cross-domain data. Our model is equipped with intra-sentence
	gated-attention composition which helps achieve a better performance. In
	addition to submitting our model to the Shared Task, we have also tested it on
	the Stanford Natural Language Inference (SNLI) dataset. We obtain an accuracy
	of 85.5%, which is the best reported result on SNLI when cross-sentence
	attention is not allowed, the same condition enforced in RepEval 2017.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>chen-EtAl:2017:RepEval</bibkey>
  </paper>

  <paper id="5308">
    <title>Shortcut-Stacked Sentence Encoders for Multi-Domain Inference</title>
    <author><first>Yixin</first><last>Nie</last></author>
    <author><first>Mohit</first><last>Bansal</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>41&#8211;45</pages>
    <url>http://www.aclweb.org/anthology/W17-5308</url>
    <abstract>We present a simple sequential sentence encoder for multi-domain natural
	language inference. Our encoder is based on stacked bidirectional LSTM-RNNs
	with shortcut connections and fine-tuning of word embeddings. The overall
	supervised model uses the above encoder to encode two input sentences into two
	vectors, and then uses a classifier over the vector combination to label the
	relationship between these two sentences as that of entailment, contradiction,
	or neural. Our Shortcut-Stacked sentence encoders achieve strong improvements
	over existing encoders on matched and mismatched multi-domain natural language
	inference (top single-model result in the EMNLP RepEval 2017 Shared Task
	(Nangia et al., 2017)). Moreover, they achieve the new state-of-the-art
	encoding result on the original SNLI dataset (Bowman et al., 2015).</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>nie-bansal:2017:RepEval</bibkey>
  </paper>

  <paper id="5309">
    <title>Character-level Intra Attention Network for Natural Language Inference</title>
    <author><first>Han</first><last>Yang</last></author>
    <author><first>Marta R.</first><last>Costa-juss&#224;</last></author>
    <author><first>Jos&#233; A. R.</first><last>Fonollosa</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>46&#8211;50</pages>
    <url>http://www.aclweb.org/anthology/W17-5309</url>
    <abstract>Natural language inference (NLI) is a central problem in language
	understanding. End-to-end artificial neural networks have reached
	state-of-the-art  performance in NLI field recently. In this paper, we propose
	Character-level Intra Attention Network (CIAN) for the NLI task. In our model,
	we use the character-level convolutional network to replace the standard word
	embedding layer, and we use the intra attention to capture the intra-sentence
	semantics. The proposed CIAN model provides improved results based on a newly
	published MNLI corpus.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>yang-costajussa-fonollosa:2017:RepEval</bibkey>
  </paper>

  <paper id="5310">
    <title>Refining Raw Sentence Representations for Textual Entailment Recognition via Attention</title>
    <author><first>Jorge</first><last>Balazs</last></author>
    <author><first>Edison</first><last>Marrese-Taylor</last></author>
    <author><first>Pablo</first><last>Loyola</last></author>
    <author><first>Yutaka</first><last>Matsuo</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>51&#8211;55</pages>
    <url>http://www.aclweb.org/anthology/W17-5310</url>
    <abstract>In this paper we present the model used by the team Rivercorners for the 2017
	RepEval shared task. First, our model separately encodes a pair of sentences
	into variable-length representations by using a bidirectional LSTM. Later, it
	creates fixed-length raw representations by means of simple aggregation
	functions, which are then refined using an attention mechanism. Finally it
	combines the refined representations of both sentences into a single vector to
	be used for classification. With this model we obtained test accuracies of
	72.057% and 72.055% in the matched and mismatched evaluation tracks
	respectively, outperforming the LSTM baseline, and obtaining performances
	similar to a model that relies on shared information between sentences (ESIM).
	When using an ensemble both accuracies increased to 72.247% and 72.827%
	respectively.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>balazs-EtAl:2017:RepEval</bibkey>
  </paper>

  <paper id="5311">
    <title>LCT-MALTA's Submission to RepEval 2017 Shared Task</title>
    <author><first>Hoa</first><last>Vu</last></author>
    <booktitle>Proceedings of the 2nd Workshop on Evaluating Vector Space Representations for NLP</booktitle>
    <month>September</month>
    <year>2017</year>
    <address>Copenhagen, Denmark</address>
    <publisher>Association for Computational Linguistics</publisher>
    <pages>56&#8211;60</pages>
    <url>http://www.aclweb.org/anthology/W17-5311</url>
    <abstract>System using BiLSTM and max pooling. Embedding is enhanced by POS, character
	and dependency info.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>vu:2017:RepEval</bibkey>
  </paper>

</volume>

