@inproceedings{grishina-sorokin-2022-local,
title = "Local-to-global learning for iterative training of production {SLU} models on new features",
author = "Grishina, Yulia and
Sorokin, Daniil",
editor = "Loukina, Anastassia and
Gangadharaiah, Rashmi and
Min, Bonan",
booktitle = "Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track",
month = jul,
year = "2022",
address = "Hybrid: Seattle, Washington + Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.naacl-industry.13",
doi = "10.18653/v1/2022.naacl-industry.13",
pages = "103--111",
abstract = "In production SLU systems, new training data becomes available with time so that ML models need to be updated on a regular basis. Specifically, releasing new features adds new classes of data while the old data remains constant. However, retraining the full model each time from scratch is computationally expensive. To address this problem, we propose to consider production releases from the curriculum learning perspective and to adapt the local-to-global learning (LGL) schedule (Cheng et. al, 2019) for a statistical model that starts with fewer output classes and adds more classes with each iteration. We report experiments for the tasks of intent classification and slot filling in the context of a production voice-assistant. First, we apply the original LGL schedule on our data and then adapt LGL to the production setting where the full data is not available at initial training iterations. We demonstrate that our method improves model error rates by 7.3{\%} and saves up to 25{\%} training time for individual iterations.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="grishina-sorokin-2022-local">
<titleInfo>
<title>Local-to-global learning for iterative training of production SLU models on new features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Grishina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Sorokin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Loukina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Gangadharaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bonan</namePart>
<namePart type="family">Min</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid: Seattle, Washington + Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In production SLU systems, new training data becomes available with time so that ML models need to be updated on a regular basis. Specifically, releasing new features adds new classes of data while the old data remains constant. However, retraining the full model each time from scratch is computationally expensive. To address this problem, we propose to consider production releases from the curriculum learning perspective and to adapt the local-to-global learning (LGL) schedule (Cheng et. al, 2019) for a statistical model that starts with fewer output classes and adds more classes with each iteration. We report experiments for the tasks of intent classification and slot filling in the context of a production voice-assistant. First, we apply the original LGL schedule on our data and then adapt LGL to the production setting where the full data is not available at initial training iterations. We demonstrate that our method improves model error rates by 7.3% and saves up to 25% training time for individual iterations.</abstract>
<identifier type="citekey">grishina-sorokin-2022-local</identifier>
<identifier type="doi">10.18653/v1/2022.naacl-industry.13</identifier>
<location>
<url>https://aclanthology.org/2022.naacl-industry.13</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>103</start>
<end>111</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Local-to-global learning for iterative training of production SLU models on new features
%A Grishina, Yulia
%A Sorokin, Daniil
%Y Loukina, Anastassia
%Y Gangadharaiah, Rashmi
%Y Min, Bonan
%S Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Track
%D 2022
%8 July
%I Association for Computational Linguistics
%C Hybrid: Seattle, Washington + Online
%F grishina-sorokin-2022-local
%X In production SLU systems, new training data becomes available with time so that ML models need to be updated on a regular basis. Specifically, releasing new features adds new classes of data while the old data remains constant. However, retraining the full model each time from scratch is computationally expensive. To address this problem, we propose to consider production releases from the curriculum learning perspective and to adapt the local-to-global learning (LGL) schedule (Cheng et. al, 2019) for a statistical model that starts with fewer output classes and adds more classes with each iteration. We report experiments for the tasks of intent classification and slot filling in the context of a production voice-assistant. First, we apply the original LGL schedule on our data and then adapt LGL to the production setting where the full data is not available at initial training iterations. We demonstrate that our method improves model error rates by 7.3% and saves up to 25% training time for individual iterations.
%R 10.18653/v1/2022.naacl-industry.13
%U https://aclanthology.org/2022.naacl-industry.13
%U https://doi.org/10.18653/v1/2022.naacl-industry.13
%P 103-111
Markdown (Informal)
[Local-to-global learning for iterative training of production SLU models on new features](https://aclanthology.org/2022.naacl-industry.13) (Grishina & Sorokin, NAACL 2022)
ACL