@inproceedings{hueser-etal-2023-sharing,
title = "Sharing Encoder Representations across Languages, Domains and Tasks in Large-Scale Spoken Language Understanding",
author = "Hueser, Jonathan and
Gaspers, Judith and
Gueudre, Thomas and
Prakash, Chandana and
Cao, Jin and
Sorokin, Daniil and
Do, Quynh and
Anastassacos, Nicolas and
Falke, Tobias and
Gojayev, Turan",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.43/",
doi = "10.18653/v1/2023.acl-industry.43",
pages = "447--456",
abstract = "Leveraging representations from pre-trained transformer-based encoders achieves state-of-the-art performance on numerous NLP tasks. Larger encoders can improve accuracy for spoken language understanding (SLU) but are challenging to use given the inference latency constraints of online systems (especially on CPU machines).We evaluate using a larger 170M parameter BERT encoder that shares representations across languages, domains and tasks for SLU compared to using smaller 17M parameter BERT encoders with language-, domain- and task-decoupled finetuning.Running inference with a larger shared encoder on GPU is latency neutral and reduces infrastructure cost compared to running inference for decoupled smaller encoders on CPU machines. The larger shared encoder reduces semantic error rates by 4.62{\%} for test sets representing user requests to voice-controlled devices and 5.79{\%} on the tail of the test sets on average across four languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hueser-etal-2023-sharing">
<titleInfo>
<title>Sharing Encoder Representations across Languages, Domains and Tasks in Large-Scale Spoken Language Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Hueser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judith</namePart>
<namePart type="family">Gaspers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Gueudre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chandana</namePart>
<namePart type="family">Prakash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quynh</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Anastassacos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tobias</namePart>
<namePart type="family">Falke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Turan</namePart>
<namePart type="family">Gojayev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Leveraging representations from pre-trained transformer-based encoders achieves state-of-the-art performance on numerous NLP tasks. Larger encoders can improve accuracy for spoken language understanding (SLU) but are challenging to use given the inference latency constraints of online systems (especially on CPU machines).We evaluate using a larger 170M parameter BERT encoder that shares representations across languages, domains and tasks for SLU compared to using smaller 17M parameter BERT encoders with language-, domain- and task-decoupled finetuning.Running inference with a larger shared encoder on GPU is latency neutral and reduces infrastructure cost compared to running inference for decoupled smaller encoders on CPU machines. The larger shared encoder reduces semantic error rates by 4.62% for test sets representing user requests to voice-controlled devices and 5.79% on the tail of the test sets on average across four languages.</abstract>
<identifier type="citekey">hueser-etal-2023-sharing</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.43</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.43/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>447</start>
<end>456</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sharing Encoder Representations across Languages, Domains and Tasks in Large-Scale Spoken Language Understanding
%A Hueser, Jonathan
%A Gaspers, Judith
%A Gueudre, Thomas
%A Prakash, Chandana
%A Cao, Jin
%A Sorokin, Daniil
%A Do, Quynh
%A Anastassacos, Nicolas
%A Falke, Tobias
%A Gojayev, Turan
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F hueser-etal-2023-sharing
%X Leveraging representations from pre-trained transformer-based encoders achieves state-of-the-art performance on numerous NLP tasks. Larger encoders can improve accuracy for spoken language understanding (SLU) but are challenging to use given the inference latency constraints of online systems (especially on CPU machines).We evaluate using a larger 170M parameter BERT encoder that shares representations across languages, domains and tasks for SLU compared to using smaller 17M parameter BERT encoders with language-, domain- and task-decoupled finetuning.Running inference with a larger shared encoder on GPU is latency neutral and reduces infrastructure cost compared to running inference for decoupled smaller encoders on CPU machines. The larger shared encoder reduces semantic error rates by 4.62% for test sets representing user requests to voice-controlled devices and 5.79% on the tail of the test sets on average across four languages.
%R 10.18653/v1/2023.acl-industry.43
%U https://aclanthology.org/2023.acl-industry.43/
%U https://doi.org/10.18653/v1/2023.acl-industry.43
%P 447-456
Markdown (Informal)
[Sharing Encoder Representations across Languages, Domains and Tasks in Large-Scale Spoken Language Understanding](https://aclanthology.org/2023.acl-industry.43/) (Hueser et al., ACL 2023)
ACL
- Jonathan Hueser, Judith Gaspers, Thomas Gueudre, Chandana Prakash, Jin Cao, Daniil Sorokin, Quynh Do, Nicolas Anastassacos, Tobias Falke, and Turan Gojayev. 2023. Sharing Encoder Representations across Languages, Domains and Tasks in Large-Scale Spoken Language Understanding. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track), pages 447–456, Toronto, Canada. Association for Computational Linguistics.