@inproceedings{oramas-etal-2021-bootstrapping,
title = "Bootstrapping a Music Voice Assistant with Weak Supervision",
author = "Oramas, Sergio and
Quadrana, Massimo and
Gouyon, Fabien",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.7",
doi = "10.18653/v1/2021.naacl-industry.7",
pages = "49--55",
abstract = "One of the first building blocks to create a voice assistant relates to the task of tagging entities or attributes in user queries. This can be particularly challenging when entities are in the tenth of millions, as is the case of e.g. music catalogs. Training slot tagging models at an industrial scale requires large quantities of accurately labeled user queries, which are often hard and costly to gather. On the other hand, voice assistants typically collect plenty of unlabeled queries that often remain unexploited. This paper presents a weakly-supervised methodology to label large amounts of voice query logs, enhanced with a manual filtering step. Our experimental evaluations show that slot tagging models trained on weakly-supervised data outperform models trained on hand-annotated or synthetic data, at a lower cost. Further, manual filtering of weakly-supervised data leads to a very significant reduction in Sentence Error Rate, while allowing us to drastically reduce human curation efforts from weeks to hours, with respect to hand-annotation of queries. The method is applied to successfully bootstrap a slot tagging system for a major music streaming service that currently serves several tens of thousands of daily voice queries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oramas-etal-2021-bootstrapping">
<titleInfo>
<title>Bootstrapping a Music Voice Assistant with Weak Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Oramas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Massimo</namePart>
<namePart type="family">Quadrana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabien</namePart>
<namePart type="family">Gouyon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the first building blocks to create a voice assistant relates to the task of tagging entities or attributes in user queries. This can be particularly challenging when entities are in the tenth of millions, as is the case of e.g. music catalogs. Training slot tagging models at an industrial scale requires large quantities of accurately labeled user queries, which are often hard and costly to gather. On the other hand, voice assistants typically collect plenty of unlabeled queries that often remain unexploited. This paper presents a weakly-supervised methodology to label large amounts of voice query logs, enhanced with a manual filtering step. Our experimental evaluations show that slot tagging models trained on weakly-supervised data outperform models trained on hand-annotated or synthetic data, at a lower cost. Further, manual filtering of weakly-supervised data leads to a very significant reduction in Sentence Error Rate, while allowing us to drastically reduce human curation efforts from weeks to hours, with respect to hand-annotation of queries. The method is applied to successfully bootstrap a slot tagging system for a major music streaming service that currently serves several tens of thousands of daily voice queries.</abstract>
<identifier type="citekey">oramas-etal-2021-bootstrapping</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.7</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.7</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>49</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bootstrapping a Music Voice Assistant with Weak Supervision
%A Oramas, Sergio
%A Quadrana, Massimo
%A Gouyon, Fabien
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F oramas-etal-2021-bootstrapping
%X One of the first building blocks to create a voice assistant relates to the task of tagging entities or attributes in user queries. This can be particularly challenging when entities are in the tenth of millions, as is the case of e.g. music catalogs. Training slot tagging models at an industrial scale requires large quantities of accurately labeled user queries, which are often hard and costly to gather. On the other hand, voice assistants typically collect plenty of unlabeled queries that often remain unexploited. This paper presents a weakly-supervised methodology to label large amounts of voice query logs, enhanced with a manual filtering step. Our experimental evaluations show that slot tagging models trained on weakly-supervised data outperform models trained on hand-annotated or synthetic data, at a lower cost. Further, manual filtering of weakly-supervised data leads to a very significant reduction in Sentence Error Rate, while allowing us to drastically reduce human curation efforts from weeks to hours, with respect to hand-annotation of queries. The method is applied to successfully bootstrap a slot tagging system for a major music streaming service that currently serves several tens of thousands of daily voice queries.
%R 10.18653/v1/2021.naacl-industry.7
%U https://aclanthology.org/2021.naacl-industry.7
%U https://doi.org/10.18653/v1/2021.naacl-industry.7
%P 49-55
Markdown (Informal)
[Bootstrapping a Music Voice Assistant with Weak Supervision](https://aclanthology.org/2021.naacl-industry.7) (Oramas et al., NAACL 2021)
ACL
- Sergio Oramas, Massimo Quadrana, and Fabien Gouyon. 2021. Bootstrapping a Music Voice Assistant with Weak Supervision. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pages 49–55, Online. Association for Computational Linguistics.