@inproceedings{xu-etal-2020-data,
title = "Data Augmentation for Multiclass Utterance Classification {--} A Systematic Study",
author = "Xu, Binxia and
Qiu, Siyuan and
Zhang, Jie and
Wang, Yafang and
Shen, Xiaoyu and
de Melo, Gerard",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.479",
doi = "10.18653/v1/2020.coling-main.479",
pages = "5494--5506",
abstract = "Utterance classification is a key component in many conversational systems. However, classifying real-world user utterances is challenging, as people may express their ideas and thoughts in manifold ways, and the amount of training data for some categories may be fairly limited, resulting in imbalanced data distributions. To alleviate these issues, we conduct a comprehensive survey regarding data augmentation approaches for text classification, including simple random resampling, word-level transformations, and neural text generation to cope with imbalanced data. Our experiments focus on multi-class datasets with a large number of data samples, which has not been systematically studied in previous work. The results show that the effectiveness of different data augmentation schemes depends on the nature of the dataset under consideration.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2020-data">
<titleInfo>
<title>Data Augmentation for Multiclass Utterance Classification – A Systematic Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Binxia</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyuan</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yafang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyu</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">de Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Utterance classification is a key component in many conversational systems. However, classifying real-world user utterances is challenging, as people may express their ideas and thoughts in manifold ways, and the amount of training data for some categories may be fairly limited, resulting in imbalanced data distributions. To alleviate these issues, we conduct a comprehensive survey regarding data augmentation approaches for text classification, including simple random resampling, word-level transformations, and neural text generation to cope with imbalanced data. Our experiments focus on multi-class datasets with a large number of data samples, which has not been systematically studied in previous work. The results show that the effectiveness of different data augmentation schemes depends on the nature of the dataset under consideration.</abstract>
<identifier type="citekey">xu-etal-2020-data</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.479</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.479</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>5494</start>
<end>5506</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation for Multiclass Utterance Classification – A Systematic Study
%A Xu, Binxia
%A Qiu, Siyuan
%A Zhang, Jie
%A Wang, Yafang
%A Shen, Xiaoyu
%A de Melo, Gerard
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F xu-etal-2020-data
%X Utterance classification is a key component in many conversational systems. However, classifying real-world user utterances is challenging, as people may express their ideas and thoughts in manifold ways, and the amount of training data for some categories may be fairly limited, resulting in imbalanced data distributions. To alleviate these issues, we conduct a comprehensive survey regarding data augmentation approaches for text classification, including simple random resampling, word-level transformations, and neural text generation to cope with imbalanced data. Our experiments focus on multi-class datasets with a large number of data samples, which has not been systematically studied in previous work. The results show that the effectiveness of different data augmentation schemes depends on the nature of the dataset under consideration.
%R 10.18653/v1/2020.coling-main.479
%U https://aclanthology.org/2020.coling-main.479
%U https://doi.org/10.18653/v1/2020.coling-main.479
%P 5494-5506
Markdown (Informal)
[Data Augmentation for Multiclass Utterance Classification – A Systematic Study](https://aclanthology.org/2020.coling-main.479) (Xu et al., COLING 2020)
ACL