@inproceedings{marek-etal-2021-oodgan,
title = "{O}od{GAN}: Generative Adversarial Network for Out-of-Domain Data Generation",
author = "Marek, Petr and
Naik, Vishal Ishwar and
Goyal, Anuj and
Auvray, Vincent",
editor = "Kim, Young-bum and
Li, Yunyao and
Rambow, Owen",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-industry.30",
doi = "10.18653/v1/2021.naacl-industry.30",
pages = "238--245",
abstract = "Detecting an Out-of-Domain (OOD) utterance is crucial for a robust dialog system. Most dialog systems are trained on a pool of annotated OOD data to achieve this goal. However, collecting the annotated OOD data for a given domain is an expensive process. To mitigate this issue, previous works have proposed generative adversarial networks (GAN) based models to generate OOD data for a given domain automatically. However, these proposed models do not work directly with the text. They work with the text{'}s latent space instead, enforcing these models to include components responsible for encoding text into latent space and decoding it back, such as auto-encoder. These components increase the model complexity, making it difficult to train. We propose OodGAN, a sequential generative adversarial network (SeqGAN) based model for OOD data generation. Our proposed model works directly on the text and hence eliminates the need to include an auto-encoder. OOD data generated using OodGAN model outperforms state-of-the-art in OOD detection metrics for ROSTD (67{\%} relative improvement in FPR 0.95) and OSQ datasets (28{\%} relative improvement in FPR 0.95)",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marek-etal-2021-oodgan">
<titleInfo>
<title>OodGAN: Generative Adversarial Network for Out-of-Domain Data Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Petr</namePart>
<namePart type="family">Marek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="given">Ishwar</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anuj</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Auvray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Young-bum</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunyao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Detecting an Out-of-Domain (OOD) utterance is crucial for a robust dialog system. Most dialog systems are trained on a pool of annotated OOD data to achieve this goal. However, collecting the annotated OOD data for a given domain is an expensive process. To mitigate this issue, previous works have proposed generative adversarial networks (GAN) based models to generate OOD data for a given domain automatically. However, these proposed models do not work directly with the text. They work with the text’s latent space instead, enforcing these models to include components responsible for encoding text into latent space and decoding it back, such as auto-encoder. These components increase the model complexity, making it difficult to train. We propose OodGAN, a sequential generative adversarial network (SeqGAN) based model for OOD data generation. Our proposed model works directly on the text and hence eliminates the need to include an auto-encoder. OOD data generated using OodGAN model outperforms state-of-the-art in OOD detection metrics for ROSTD (67% relative improvement in FPR 0.95) and OSQ datasets (28% relative improvement in FPR 0.95)</abstract>
<identifier type="citekey">marek-etal-2021-oodgan</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-industry.30</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-industry.30</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>238</start>
<end>245</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OodGAN: Generative Adversarial Network for Out-of-Domain Data Generation
%A Marek, Petr
%A Naik, Vishal Ishwar
%A Goyal, Anuj
%A Auvray, Vincent
%Y Kim, Young-bum
%Y Li, Yunyao
%Y Rambow, Owen
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F marek-etal-2021-oodgan
%X Detecting an Out-of-Domain (OOD) utterance is crucial for a robust dialog system. Most dialog systems are trained on a pool of annotated OOD data to achieve this goal. However, collecting the annotated OOD data for a given domain is an expensive process. To mitigate this issue, previous works have proposed generative adversarial networks (GAN) based models to generate OOD data for a given domain automatically. However, these proposed models do not work directly with the text. They work with the text’s latent space instead, enforcing these models to include components responsible for encoding text into latent space and decoding it back, such as auto-encoder. These components increase the model complexity, making it difficult to train. We propose OodGAN, a sequential generative adversarial network (SeqGAN) based model for OOD data generation. Our proposed model works directly on the text and hence eliminates the need to include an auto-encoder. OOD data generated using OodGAN model outperforms state-of-the-art in OOD detection metrics for ROSTD (67% relative improvement in FPR 0.95) and OSQ datasets (28% relative improvement in FPR 0.95)
%R 10.18653/v1/2021.naacl-industry.30
%U https://aclanthology.org/2021.naacl-industry.30
%U https://doi.org/10.18653/v1/2021.naacl-industry.30
%P 238-245
Markdown (Informal)
[OodGAN: Generative Adversarial Network for Out-of-Domain Data Generation](https://aclanthology.org/2021.naacl-industry.30) (Marek et al., NAACL 2021)
ACL
- Petr Marek, Vishal Ishwar Naik, Anuj Goyal, and Vincent Auvray. 2021. OodGAN: Generative Adversarial Network for Out-of-Domain Data Generation. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Industry Papers, pages 238–245, Online. Association for Computational Linguistics.