@article{ponti-etal-2021-parameter,
title = "Parameter Space Factorization for Zero-Shot Learning across Tasks and Languages",
author = "Ponti, Edoardo M. and
Vuli{\'c}, Ivan and
Cotterell, Ryan and
Parovic, Marinela and
Reichart, Roi and
Korhonen, Anna",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.25",
doi = "10.1162/tacl_a_00374",
pages = "410--428",
abstract = "Most combinations of NLP tasks and language varieties lack in-domain examples for supervised training because of the paucity of annotated data. How can neural models make sample-efficient generalizations from task{--}language combinations with available data to low-resource ones? In this work, we propose a Bayesian generative model for the space of neural parameters. We assume that this space can be factorized into latent variables for each language and each task. We infer the posteriors over such latent variables based on data from seen task{--}language combinations through variational inference. This enables zero-shot classification on unseen combinations at prediction time. For instance, given training data for named entity recognition (NER) in Vietnamese and for part-of-speech (POS) tagging in Wolof, our model can perform accurate predictions for NER in Wolof. In particular, we experiment with a typologically diverse sample of 33 languages from 4 continents and 11 families, and show that our model yields comparable or better results than state-of-the-art, zero-shot cross-lingual transfer methods. Our code is available at github.com/cambridgeltl/parameter-factorization.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ponti-etal-2021-parameter">
<titleInfo>
<title>Parameter Space Factorization for Zero-Shot Learning across Tasks and Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Edoardo</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Ponti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vulić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marinela</namePart>
<namePart type="family">Parovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roi</namePart>
<namePart type="family">Reichart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Most combinations of NLP tasks and language varieties lack in-domain examples for supervised training because of the paucity of annotated data. How can neural models make sample-efficient generalizations from task–language combinations with available data to low-resource ones? In this work, we propose a Bayesian generative model for the space of neural parameters. We assume that this space can be factorized into latent variables for each language and each task. We infer the posteriors over such latent variables based on data from seen task–language combinations through variational inference. This enables zero-shot classification on unseen combinations at prediction time. For instance, given training data for named entity recognition (NER) in Vietnamese and for part-of-speech (POS) tagging in Wolof, our model can perform accurate predictions for NER in Wolof. In particular, we experiment with a typologically diverse sample of 33 languages from 4 continents and 11 families, and show that our model yields comparable or better results than state-of-the-art, zero-shot cross-lingual transfer methods. Our code is available at github.com/cambridgeltl/parameter-factorization.</abstract>
<identifier type="citekey">ponti-etal-2021-parameter</identifier>
<identifier type="doi">10.1162/tacl_a_00374</identifier>
<location>
<url>https://aclanthology.org/2021.tacl-1.25</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>9</number></detail>
<extent unit="page">
<start>410</start>
<end>428</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Parameter Space Factorization for Zero-Shot Learning across Tasks and Languages
%A Ponti, Edoardo M.
%A Vulić, Ivan
%A Cotterell, Ryan
%A Parovic, Marinela
%A Reichart, Roi
%A Korhonen, Anna
%J Transactions of the Association for Computational Linguistics
%D 2021
%V 9
%I MIT Press
%C Cambridge, MA
%F ponti-etal-2021-parameter
%X Most combinations of NLP tasks and language varieties lack in-domain examples for supervised training because of the paucity of annotated data. How can neural models make sample-efficient generalizations from task–language combinations with available data to low-resource ones? In this work, we propose a Bayesian generative model for the space of neural parameters. We assume that this space can be factorized into latent variables for each language and each task. We infer the posteriors over such latent variables based on data from seen task–language combinations through variational inference. This enables zero-shot classification on unseen combinations at prediction time. For instance, given training data for named entity recognition (NER) in Vietnamese and for part-of-speech (POS) tagging in Wolof, our model can perform accurate predictions for NER in Wolof. In particular, we experiment with a typologically diverse sample of 33 languages from 4 continents and 11 families, and show that our model yields comparable or better results than state-of-the-art, zero-shot cross-lingual transfer methods. Our code is available at github.com/cambridgeltl/parameter-factorization.
%R 10.1162/tacl_a_00374
%U https://aclanthology.org/2021.tacl-1.25
%U https://doi.org/10.1162/tacl_a_00374
%P 410-428
Markdown (Informal)
[Parameter Space Factorization for Zero-Shot Learning across Tasks and Languages](https://aclanthology.org/2021.tacl-1.25) (Ponti et al., TACL 2021)
ACL