@inproceedings{schwenk-douze-2017-learning,
    title = "Learning Joint Multilingual Sentence Representations with Neural Machine Translation",
    author = "Schwenk, Holger  and
      Douze, Matthijs",
    editor = "Blunsom, Phil  and
      Bordes, Antoine  and
      Cho, Kyunghyun  and
      Cohen, Shay  and
      Dyer, Chris  and
      Grefenstette, Edward  and
      Hermann, Karl Moritz  and
      Rimell, Laura  and
      Weston, Jason  and
      Yih, Scott",
    booktitle = "Proceedings of the 2nd Workshop on Representation Learning for {NLP}",
    month = aug,
    year = "2017",
    address = "Vancouver, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W17-2619/",
    doi = "10.18653/v1/W17-2619",
    pages = "157--167",
    abstract = "In this paper, we use the framework of neural machine translation to learn joint sentence representations across six very different languages. Our aim is that a representation which is independent of the language, is likely to capture the underlying semantics. We define a new cross-lingual similarity measure, compare up to 1.4M sentence representations and study the characteristics of close sentences. We provide experimental evidence that sentences that are close in embedding space are indeed semantically highly related, but often have quite different structure and syntax. These relations also hold when comparing sentences in different languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schwenk-douze-2017-learning">
    <titleInfo>
        <title>Learning Joint Multilingual Sentence Representations with Neural Machine Translation</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Holger</namePart>
        <namePart type="family">Schwenk</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Matthijs</namePart>
        <namePart type="family">Douze</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2017-08</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 2nd Workshop on Representation Learning for NLP</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Phil</namePart>
            <namePart type="family">Blunsom</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Antoine</namePart>
            <namePart type="family">Bordes</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Kyunghyun</namePart>
            <namePart type="family">Cho</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Shay</namePart>
            <namePart type="family">Cohen</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Chris</namePart>
            <namePart type="family">Dyer</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Edward</namePart>
            <namePart type="family">Grefenstette</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Karl</namePart>
            <namePart type="given">Moritz</namePart>
            <namePart type="family">Hermann</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Laura</namePart>
            <namePart type="family">Rimell</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jason</namePart>
            <namePart type="family">Weston</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Scott</namePart>
            <namePart type="family">Yih</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Vancouver, Canada</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>In this paper, we use the framework of neural machine translation to learn joint sentence representations across six very different languages. Our aim is that a representation which is independent of the language, is likely to capture the underlying semantics. We define a new cross-lingual similarity measure, compare up to 1.4M sentence representations and study the characteristics of close sentences. We provide experimental evidence that sentences that are close in embedding space are indeed semantically highly related, but often have quite different structure and syntax. These relations also hold when comparing sentences in different languages.</abstract>
    <identifier type="citekey">schwenk-douze-2017-learning</identifier>
    <identifier type="doi">10.18653/v1/W17-2619</identifier>
    <location>
        <url>https://aclanthology.org/W17-2619/</url>
    </location>
    <part>
        <date>2017-08</date>
        <extent unit="page">
            <start>157</start>
            <end>167</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Joint Multilingual Sentence Representations with Neural Machine Translation
%A Schwenk, Holger
%A Douze, Matthijs
%Y Blunsom, Phil
%Y Bordes, Antoine
%Y Cho, Kyunghyun
%Y Cohen, Shay
%Y Dyer, Chris
%Y Grefenstette, Edward
%Y Hermann, Karl Moritz
%Y Rimell, Laura
%Y Weston, Jason
%Y Yih, Scott
%S Proceedings of the 2nd Workshop on Representation Learning for NLP
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F schwenk-douze-2017-learning
%X In this paper, we use the framework of neural machine translation to learn joint sentence representations across six very different languages. Our aim is that a representation which is independent of the language, is likely to capture the underlying semantics. We define a new cross-lingual similarity measure, compare up to 1.4M sentence representations and study the characteristics of close sentences. We provide experimental evidence that sentences that are close in embedding space are indeed semantically highly related, but often have quite different structure and syntax. These relations also hold when comparing sentences in different languages.
%R 10.18653/v1/W17-2619
%U https://aclanthology.org/W17-2619/
%U https://doi.org/10.18653/v1/W17-2619
%P 157-167
Markdown (Informal)
[Learning Joint Multilingual Sentence Representations with Neural Machine Translation](https://aclanthology.org/W17-2619/) (Schwenk & Douze, RepL4NLP 2017)
ACL