@inproceedings{keskar-etal-2020-thieves,
title = "The Thieves on Sesame Street are Polyglots - Extracting Multilingual Models from Monolingual {API}s",
author = "Keskar, Nitish Shirish and
McCann, Bryan and
Xiong, Caiming and
Socher, Richard",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.501",
doi = "10.18653/v1/2020.emnlp-main.501",
pages = "6203--6207",
abstract = "Pre-training in natural language processing makes it easier for an adversary with only query access to a victim model to reconstruct a local copy of the victim by training with gibberish input data paired with the victim{'}s labels for that data. We discover that this extraction process extends to local copies initialized from a pre-trained, multilingual model while the victim remains monolingual. The extracted model learns the task from the monolingual victim, but it generalizes far better than the victim to several other languages. This is done without ever showing the multilingual, extracted model a well-formed input in any of the languages for the target task. We also demonstrate that a few real examples can greatly improve performance, and we analyze how these results shed light on how such extraction methods succeed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="keskar-etal-2020-thieves">
<titleInfo>
<title>The Thieves on Sesame Street are Polyglots - Extracting Multilingual Models from Monolingual APIs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nitish</namePart>
<namePart type="given">Shirish</namePart>
<namePart type="family">Keskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bryan</namePart>
<namePart type="family">McCann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caiming</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Socher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-training in natural language processing makes it easier for an adversary with only query access to a victim model to reconstruct a local copy of the victim by training with gibberish input data paired with the victim’s labels for that data. We discover that this extraction process extends to local copies initialized from a pre-trained, multilingual model while the victim remains monolingual. The extracted model learns the task from the monolingual victim, but it generalizes far better than the victim to several other languages. This is done without ever showing the multilingual, extracted model a well-formed input in any of the languages for the target task. We also demonstrate that a few real examples can greatly improve performance, and we analyze how these results shed light on how such extraction methods succeed.</abstract>
<identifier type="citekey">keskar-etal-2020-thieves</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.501</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.501</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>6203</start>
<end>6207</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Thieves on Sesame Street are Polyglots - Extracting Multilingual Models from Monolingual APIs
%A Keskar, Nitish Shirish
%A McCann, Bryan
%A Xiong, Caiming
%A Socher, Richard
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F keskar-etal-2020-thieves
%X Pre-training in natural language processing makes it easier for an adversary with only query access to a victim model to reconstruct a local copy of the victim by training with gibberish input data paired with the victim’s labels for that data. We discover that this extraction process extends to local copies initialized from a pre-trained, multilingual model while the victim remains monolingual. The extracted model learns the task from the monolingual victim, but it generalizes far better than the victim to several other languages. This is done without ever showing the multilingual, extracted model a well-formed input in any of the languages for the target task. We also demonstrate that a few real examples can greatly improve performance, and we analyze how these results shed light on how such extraction methods succeed.
%R 10.18653/v1/2020.emnlp-main.501
%U https://aclanthology.org/2020.emnlp-main.501
%U https://doi.org/10.18653/v1/2020.emnlp-main.501
%P 6203-6207
Markdown (Informal)
[The Thieves on Sesame Street are Polyglots - Extracting Multilingual Models from Monolingual APIs](https://aclanthology.org/2020.emnlp-main.501) (Keskar et al., EMNLP 2020)
ACL