@inproceedings{abujabal-etal-2021-continuous,
title = "Continuous Model Improvement for Language Understanding with Machine Translation",
author = "Abujabal, Abdalghani and
Delli Bovi, Claudio and
Ryu, Sungho and
Gojayev, Turan and
Triefenbach, Fabian and
Versley, Yannick",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.8",
doi = "10.18653/v1/2021.naacl-industry.8",
pages = "56--62",
abstract = "Scaling conversational personal assistants to a multitude of languages puts high demands on collecting and labelling data, a setting in which cross-lingual learning techniques can help to reconcile the need for well-performing Natural Language Understanding (NLU) with a desideratum to support many languages without incurring unacceptable cost. In this work, we show that automatically annotating unlabeled utterances using Machine Translation in an offline fashion and adding them to the training data can improve performance for existing NLU features for low-resource languages, where a straightforward translate-test approach as considered in existing literature would fail the latency requirements of a live environment. We demonstrate the effectiveness of our method with intrinsic and extrinsic evaluation using a real-world commercial dialog system in German. Beyond an intrinsic evaluation, where 56{\%} of the resulting automatically labeled utterances had a perfect match with ground-truth labels, we see significant performance improvements in an extrinsic evaluation settings when manual labeled data is available in small quantities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abujabal-etal-2021-continuous">
<titleInfo>
<title>Continuous Model Improvement for Language Understanding with Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdalghani</namePart>
<namePart type="family">Abujabal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudio</namePart>
<namePart type="family">Delli Bovi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sungho</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Turan</namePart>
<namePart type="family">Gojayev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="family">Triefenbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannick</namePart>
<namePart type="family">Versley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Scaling conversational personal assistants to a multitude of languages puts high demands on collecting and labelling data, a setting in which cross-lingual learning techniques can help to reconcile the need for well-performing Natural Language Understanding (NLU) with a desideratum to support many languages without incurring unacceptable cost. In this work, we show that automatically annotating unlabeled utterances using Machine Translation in an offline fashion and adding them to the training data can improve performance for existing NLU features for low-resource languages, where a straightforward translate-test approach as considered in existing literature would fail the latency requirements of a live environment. We demonstrate the effectiveness of our method with intrinsic and extrinsic evaluation using a real-world commercial dialog system in German. Beyond an intrinsic evaluation, where 56% of the resulting automatically labeled utterances had a perfect match with ground-truth labels, we see significant performance improvements in an extrinsic evaluation settings when manual labeled data is available in small quantities.</abstract>
<identifier type="citekey">abujabal-etal-2021-continuous</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.8</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.8</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>56</start>
<end>62</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Continuous Model Improvement for Language Understanding with Machine Translation
%A Abujabal, Abdalghani
%A Delli Bovi, Claudio
%A Ryu, Sungho
%A Gojayev, Turan
%A Triefenbach, Fabian
%A Versley, Yannick
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F abujabal-etal-2021-continuous
%X Scaling conversational personal assistants to a multitude of languages puts high demands on collecting and labelling data, a setting in which cross-lingual learning techniques can help to reconcile the need for well-performing Natural Language Understanding (NLU) with a desideratum to support many languages without incurring unacceptable cost. In this work, we show that automatically annotating unlabeled utterances using Machine Translation in an offline fashion and adding them to the training data can improve performance for existing NLU features for low-resource languages, where a straightforward translate-test approach as considered in existing literature would fail the latency requirements of a live environment. We demonstrate the effectiveness of our method with intrinsic and extrinsic evaluation using a real-world commercial dialog system in German. Beyond an intrinsic evaluation, where 56% of the resulting automatically labeled utterances had a perfect match with ground-truth labels, we see significant performance improvements in an extrinsic evaluation settings when manual labeled data is available in small quantities.
%R 10.18653/v1/2021.naacl-industry.8
%U https://aclanthology.org/2021.naacl-industry.8
%U https://doi.org/10.18653/v1/2021.naacl-industry.8
%P 56-62
Markdown (Informal)
[Continuous Model Improvement for Language Understanding with Machine Translation](https://aclanthology.org/2021.naacl-industry.8) (Abujabal et al., NAACL 2021)
ACL
- Abdalghani Abujabal, Claudio Delli Bovi, Sungho Ryu, Turan Gojayev, Fabian Triefenbach, and Yannick Versley. 2021. Continuous Model Improvement for Language Understanding with Machine Translation. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pages 56–62, Online. Association for Computational Linguistics.