@inproceedings{agic-vulic-2019-jw300,
title = "{JW}300: A Wide-Coverage Parallel Corpus for Low-Resource Languages",
author = "Agi{\'c}, {\v{Z}}eljko and
Vuli{\'c}, Ivan",
editor = "Korhonen, Anna and
Traum, David and
M{\`a}rquez, Llu{\'\i}s",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-1310",
doi = "10.18653/v1/P19-1310",
pages = "3204--3210",
abstract = "Viable cross-lingual transfer critically depends on the availability of parallel texts. Shortage of such resources imposes a development and evaluation bottleneck in multilingual processing. We introduce JW300, a parallel corpus of over 300 languages with around 100 thousand parallel sentences per language pair on average. In this paper, we present the resource and showcase its utility in experiments with cross-lingual word embedding induction and multi-source part-of-speech projection.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="agic-vulic-2019-jw300">
<titleInfo>
<title>JW300: A Wide-Coverage Parallel Corpus for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Željko</namePart>
<namePart type="family">Agić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Traum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Màrquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Viable cross-lingual transfer critically depends on the availability of parallel texts. Shortage of such resources imposes a development and evaluation bottleneck in multilingual processing. We introduce JW300, a parallel corpus of over 300 languages with around 100 thousand parallel sentences per language pair on average. In this paper, we present the resource and showcase its utility in experiments with cross-lingual word embedding induction and multi-source part-of-speech projection.</abstract>
<identifier type="citekey">agic-vulic-2019-jw300</identifier>
<identifier type="doi">10.18653/v1/P19-1310</identifier>
<location>
<url>https://aclanthology.org/P19-1310</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>3204</start>
<end>3210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JW300: A Wide-Coverage Parallel Corpus for Low-Resource Languages
%A Agić, Željko
%A Vulić, Ivan
%Y Korhonen, Anna
%Y Traum, David
%Y Màrquez, Lluís
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F agic-vulic-2019-jw300
%X Viable cross-lingual transfer critically depends on the availability of parallel texts. Shortage of such resources imposes a development and evaluation bottleneck in multilingual processing. We introduce JW300, a parallel corpus of over 300 languages with around 100 thousand parallel sentences per language pair on average. In this paper, we present the resource and showcase its utility in experiments with cross-lingual word embedding induction and multi-source part-of-speech projection.
%R 10.18653/v1/P19-1310
%U https://aclanthology.org/P19-1310
%U https://doi.org/10.18653/v1/P19-1310
%P 3204-3210
Markdown (Informal)
[JW300: A Wide-Coverage Parallel Corpus for Low-Resource Languages](https://aclanthology.org/P19-1310) (Agić & Vulić, ACL 2019)
ACL