@inproceedings{yu-etal-2018-multilingual,
title = "Multilingual Seq2seq Training with Similarity Loss for Cross-Lingual Document Classification",
author = "Yu, Katherine and
Li, Haoran and
Oguz, Barlas",
editor = "Augenstein, Isabelle and
Cao, Kris and
He, He and
Hill, Felix and
Gella, Spandana and
Kiros, Jamie and
Mei, Hongyuan and
Misra, Dipendra",
booktitle = "Proceedings of the Third Workshop on Representation Learning for {NLP}",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3023/",
doi = "10.18653/v1/W18-3023",
pages = "175--179",
abstract = "In this paper we continue experiments where neural machine translation training is used to produce joint cross-lingual fixed-dimensional sentence embeddings. In this framework we introduce a simple method of adding a loss to the learning objective which penalizes distance between representations of bilingually aligned sentences. We evaluate cross-lingual transfer using two approaches, cross-lingual similarity search on an aligned corpus (Europarl) and cross-lingual document classification on a recently published benchmark Reuters corpus, and we find the similarity loss significantly improves performance on both. Furthermore, we notice that while our Reuters results are very competitive, our English results are not as competitive, showing room for improvement in the current cross-lingual state-of-the-art. Our results are based on a set of 6 European languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2018-multilingual">
<titleInfo>
<title>Multilingual Seq2seq Training with Similarity Loss for Cross-Lingual Document Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haoran</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barlas</namePart>
<namePart type="family">Oguz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Representation Learning for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kris</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">He</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Hill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spandana</namePart>
<namePart type="family">Gella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jamie</namePart>
<namePart type="family">Kiros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyuan</namePart>
<namePart type="family">Mei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipendra</namePart>
<namePart type="family">Misra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we continue experiments where neural machine translation training is used to produce joint cross-lingual fixed-dimensional sentence embeddings. In this framework we introduce a simple method of adding a loss to the learning objective which penalizes distance between representations of bilingually aligned sentences. We evaluate cross-lingual transfer using two approaches, cross-lingual similarity search on an aligned corpus (Europarl) and cross-lingual document classification on a recently published benchmark Reuters corpus, and we find the similarity loss significantly improves performance on both. Furthermore, we notice that while our Reuters results are very competitive, our English results are not as competitive, showing room for improvement in the current cross-lingual state-of-the-art. Our results are based on a set of 6 European languages.</abstract>
<identifier type="citekey">yu-etal-2018-multilingual</identifier>
<identifier type="doi">10.18653/v1/W18-3023</identifier>
<location>
<url>https://aclanthology.org/W18-3023/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>175</start>
<end>179</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Seq2seq Training with Similarity Loss for Cross-Lingual Document Classification
%A Yu, Katherine
%A Li, Haoran
%A Oguz, Barlas
%Y Augenstein, Isabelle
%Y Cao, Kris
%Y He, He
%Y Hill, Felix
%Y Gella, Spandana
%Y Kiros, Jamie
%Y Mei, Hongyuan
%Y Misra, Dipendra
%S Proceedings of the Third Workshop on Representation Learning for NLP
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F yu-etal-2018-multilingual
%X In this paper we continue experiments where neural machine translation training is used to produce joint cross-lingual fixed-dimensional sentence embeddings. In this framework we introduce a simple method of adding a loss to the learning objective which penalizes distance between representations of bilingually aligned sentences. We evaluate cross-lingual transfer using two approaches, cross-lingual similarity search on an aligned corpus (Europarl) and cross-lingual document classification on a recently published benchmark Reuters corpus, and we find the similarity loss significantly improves performance on both. Furthermore, we notice that while our Reuters results are very competitive, our English results are not as competitive, showing room for improvement in the current cross-lingual state-of-the-art. Our results are based on a set of 6 European languages.
%R 10.18653/v1/W18-3023
%U https://aclanthology.org/W18-3023/
%U https://doi.org/10.18653/v1/W18-3023
%P 175-179
Markdown (Informal)
[Multilingual Seq2seq Training with Similarity Loss for Cross-Lingual Document Classification](https://aclanthology.org/W18-3023/) (Yu et al., RepL4NLP 2018)
ACL