@inproceedings{hakimi-parizi-cook-2020-joint,
title = "Joint Training for Learning Cross-lingual Embeddings with Sub-word Information without Parallel Corpora",
author = "Hakimi Parizi, Ali and
Cook, Paul",
editor = "Gurevych, Iryna and
Apidianaki, Marianna and
Faruqui, Manaal",
booktitle = "Proceedings of the Ninth Joint Conference on Lexical and Computational Semantics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.starsem-1.5",
pages = "39--49",
abstract = "In this paper, we propose a novel method for learning cross-lingual word embeddings, that incorporates sub-word information during training, and is able to learn high-quality embeddings from modest amounts of monolingual data and a bilingual lexicon. This method could be particularly well-suited to learning cross-lingual embeddings for lower-resource, morphologically-rich languages, enabling knowledge to be transferred from rich- to lower-resource languages. We evaluate our proposed approach simulating lower-resource languages for bilingual lexicon induction, monolingual word similarity, and document classification. Our results indicate that incorporating sub-word information indeed leads to improvements, and in the case of document classification, performance better than, or on par with, strong benchmark approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hakimi-parizi-cook-2020-joint">
<titleInfo>
<title>Joint Training for Learning Cross-lingual Embeddings with Sub-word Information without Parallel Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Hakimi Parizi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manaal</namePart>
<namePart type="family">Faruqui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a novel method for learning cross-lingual word embeddings, that incorporates sub-word information during training, and is able to learn high-quality embeddings from modest amounts of monolingual data and a bilingual lexicon. This method could be particularly well-suited to learning cross-lingual embeddings for lower-resource, morphologically-rich languages, enabling knowledge to be transferred from rich- to lower-resource languages. We evaluate our proposed approach simulating lower-resource languages for bilingual lexicon induction, monolingual word similarity, and document classification. Our results indicate that incorporating sub-word information indeed leads to improvements, and in the case of document classification, performance better than, or on par with, strong benchmark approaches.</abstract>
<identifier type="citekey">hakimi-parizi-cook-2020-joint</identifier>
<location>
<url>https://aclanthology.org/2020.starsem-1.5</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>39</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Joint Training for Learning Cross-lingual Embeddings with Sub-word Information without Parallel Corpora
%A Hakimi Parizi, Ali
%A Cook, Paul
%Y Gurevych, Iryna
%Y Apidianaki, Marianna
%Y Faruqui, Manaal
%S Proceedings of the Ninth Joint Conference on Lexical and Computational Semantics
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F hakimi-parizi-cook-2020-joint
%X In this paper, we propose a novel method for learning cross-lingual word embeddings, that incorporates sub-word information during training, and is able to learn high-quality embeddings from modest amounts of monolingual data and a bilingual lexicon. This method could be particularly well-suited to learning cross-lingual embeddings for lower-resource, morphologically-rich languages, enabling knowledge to be transferred from rich- to lower-resource languages. We evaluate our proposed approach simulating lower-resource languages for bilingual lexicon induction, monolingual word similarity, and document classification. Our results indicate that incorporating sub-word information indeed leads to improvements, and in the case of document classification, performance better than, or on par with, strong benchmark approaches.
%U https://aclanthology.org/2020.starsem-1.5
%P 39-49
Markdown (Informal)
[Joint Training for Learning Cross-lingual Embeddings with Sub-word Information without Parallel Corpora](https://aclanthology.org/2020.starsem-1.5) (Hakimi Parizi & Cook, *SEM 2020)
ACL