@inproceedings{kim-etal-2019-data,
title = "Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding",
author = "Kim, Hwa-Yeon and
Roh, Yoon-Hyung and
Kim, Young-Kil",
editor = "Kar, Sudipta and
Nadeem, Farah and
Burdick, Laura and
Durrett, Greg and
Han, Na-Rae",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-3014/",
doi = "10.18653/v1/N19-3014",
pages = "97--102",
abstract = "One of the main challenges in Spoken Language Understanding (SLU) is dealing with {\textquoteleft}open-vocabulary' slots. Recently, SLU models based on neural network were proposed, but it is still difficult to recognize the slots of unknown words or {\textquoteleft}open-vocabulary' slots because of the high cost of creating a manually tagged SLU dataset. This paper proposes data noising, which reflects the characteristics of the {\textquoteleft}open-vocabulary' slots, for data augmentation. We applied it to an attention based bi-directional recurrent neural network (Liu and Lane, 2016) and experimented with three datasets: Airline Travel Information System (ATIS), Snips, and MIT-Restaurant. We achieved performance improvements of up to 0.57{\%} and 3.25 in intent prediction (accuracy) and slot filling (f1-score), respectively. Our method is advantageous because it does not require additional memory and it can be applied simultaneously with the training process of the model."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-etal-2019-data">
<titleInfo>
<title>Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hwa-Yeon</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoon-Hyung</namePart>
<namePart type="family">Roh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Young-Kil</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farah</namePart>
<namePart type="family">Nadeem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Burdick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Durrett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Na-Rae</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the main challenges in Spoken Language Understanding (SLU) is dealing with ‘open-vocabulary’ slots. Recently, SLU models based on neural network were proposed, but it is still difficult to recognize the slots of unknown words or ‘open-vocabulary’ slots because of the high cost of creating a manually tagged SLU dataset. This paper proposes data noising, which reflects the characteristics of the ‘open-vocabulary’ slots, for data augmentation. We applied it to an attention based bi-directional recurrent neural network (Liu and Lane, 2016) and experimented with three datasets: Airline Travel Information System (ATIS), Snips, and MIT-Restaurant. We achieved performance improvements of up to 0.57% and 3.25 in intent prediction (accuracy) and slot filling (f1-score), respectively. Our method is advantageous because it does not require additional memory and it can be applied simultaneously with the training process of the model.</abstract>
<identifier type="citekey">kim-etal-2019-data</identifier>
<identifier type="doi">10.18653/v1/N19-3014</identifier>
<location>
<url>https://aclanthology.org/N19-3014/</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>97</start>
<end>102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding
%A Kim, Hwa-Yeon
%A Roh, Yoon-Hyung
%A Kim, Young-Kil
%Y Kar, Sudipta
%Y Nadeem, Farah
%Y Burdick, Laura
%Y Durrett, Greg
%Y Han, Na-Rae
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F kim-etal-2019-data
%X One of the main challenges in Spoken Language Understanding (SLU) is dealing with ‘open-vocabulary’ slots. Recently, SLU models based on neural network were proposed, but it is still difficult to recognize the slots of unknown words or ‘open-vocabulary’ slots because of the high cost of creating a manually tagged SLU dataset. This paper proposes data noising, which reflects the characteristics of the ‘open-vocabulary’ slots, for data augmentation. We applied it to an attention based bi-directional recurrent neural network (Liu and Lane, 2016) and experimented with three datasets: Airline Travel Information System (ATIS), Snips, and MIT-Restaurant. We achieved performance improvements of up to 0.57% and 3.25 in intent prediction (accuracy) and slot filling (f1-score), respectively. Our method is advantageous because it does not require additional memory and it can be applied simultaneously with the training process of the model.
%R 10.18653/v1/N19-3014
%U https://aclanthology.org/N19-3014/
%U https://doi.org/10.18653/v1/N19-3014
%P 97-102
Markdown (Informal)
[Data Augmentation by Data Noising for Open-vocabulary Slots in Spoken Language Understanding](https://aclanthology.org/N19-3014/) (Kim et al., NAACL 2019)
ACL