<?xml version="1.0" encoding="UTF-8" ?>
<volume id="W17">
  <paper id="5600">
    <title>Proceedings of the First Workshop on Curation and Applications of Parallel and Comparable Corpora</title>
    <editor>Haithem Afli</editor>
    <editor>Chao-Hong Liu</editor>
    <month>November</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <url>http://www.aclweb.org/anthology/W17-56</url>
    <bibtype>book</bibtype>
    <bibkey>Cupral:2017</bibkey>
  </paper>

  <paper id="5601">
    <title>Building a Better Bitext for Structurally Different Languages through Self-training</title>
    <author><first>Jungyeul</first><last>Park</last></author>
    <author><first>Loic</first><last>Dugast</last></author>
    <author><first>Jeen-Pyo</first><last>Hong</last></author>
    <author><first>Chang-Uk</first><last>Shin</last></author>
    <author><first>Jeong-Won</first><last>Cha</last></author>
    <booktitle>Proceedings of the First Workshop on Curation and Applications of Parallel and Comparable Corpora</booktitle>
    <month>November</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>1&#8211;10</pages>
    <url>http://www.aclweb.org/anthology/W17-5601</url>
    <abstract>We propose a novel method to bootstrap the construction of parallel corpora for
	new pairs of structurally different languages.
	We do so by combining the use of a pivot language and self-training. 
	A pivot language enables the use of existing translation models to bootstrap
	the alignment and a self-training procedure enables to achieve better
	alignment, both at the document and sentence level. 
	We also propose several evaluation methods for the resulting alignment.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>park-EtAl:2017:Cupral</bibkey>
  </paper>

  <paper id="5602">
    <title>MultiNews: A Web collection of an Aligned Multimodal and Multilingual Corpus</title>
    <author><first>Haithem</first><last>Afli</last></author>
    <author><first>Pintu</first><last>Lohar</last></author>
    <author><first>Andy</first><last>Way</last></author>
    <booktitle>Proceedings of the First Workshop on Curation and Applications of Parallel and Comparable Corpora</booktitle>
    <month>November</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>11&#8211;15</pages>
    <url>http://www.aclweb.org/anthology/W17-5602</url>
    <abstract>Integrating Natural Language Processing (NLP) and computer vision is a
	promising effort.
	However, the applicability of these methods directly depends on the
	availability 
	of a specific multimodal data that includes images and texts.
	In this paper, we present a collection of a Multimodal corpus of comparable
	texts and their images in 9 languages
	from the web news articles of Euronews website.
	This corpus has found widespread use in the NLP community in Multilingual and
	multimodal tasks.
	Here, we focus on its acquisition of the images and text data and their
	multilingual alignment.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>afli-lohar-way:2017:Cupral</bibkey>
  </paper>

  <paper id="5603">
    <title>Learning Phrase Embeddings from Paraphrases with GRUs</title>
    <author><first>zhihao</first><last>zhou</last></author>
    <author><first>Lifu</first><last>Huang</last></author>
    <author><first>Heng</first><last>Ji</last></author>
    <booktitle>Proceedings of the First Workshop on Curation and Applications of Parallel and Comparable Corpora</booktitle>
    <month>November</month>
    <year>2017</year>
    <address>Taipei, Taiwan</address>
    <publisher>Asian Federation of Natural Language Processing</publisher>
    <pages>16&#8211;23</pages>
    <url>http://www.aclweb.org/anthology/W17-5603</url>
    <abstract>Learning phrase representations has been widely explored in many Natural
	Language Processing tasks (e.g., Sentiment Analysis, Machine Translation) and
	has shown promising improvements. Previous studies either learn
	non-compositional phrase representations with general word embedding learning
	techniques or learn compositional phrase representations based on syntactic
	structures, which either require huge amounts of human annotations or cannot be
	easily generalized to all phrases. In this work, we propose to take advantage
	of large-scaled paraphrase database and present a pairwise-GRU framework to
	generate compositional phrase representations. Our framework can be re-used to
	generate representations for any phrases. Experimental results show that our
	framework achieves state-of-the-art results on several phrase similarity tasks.</abstract>
    <bibtype>inproceedings</bibtype>
    <bibkey>zhou-huang-ji:2017:Cupral</bibkey>
  </paper>

</volume>

