@inproceedings{zhu-hauff-2022-unsupervised,
title = "Unsupervised Domain Adaptation for Question Generation with {D}omain{D}ata Selection and Self-training",
author = "Zhu, Peide and
Hauff, Claudia",
editor = "Carpuat, Marine and
de Marneffe, Marie-Catherine and
Meza Ruiz, Ivan Vladimir",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2022",
month = jul,
year = "2022",
address = "Seattle, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-naacl.183",
doi = "10.18653/v1/2022.findings-naacl.183",
pages = "2388--2401",
abstract = "Question generation (QG) approaches based on large neural models require (i) large-scale and (ii) high-quality training data. These two requirements pose difficulties for specific application domains where training data is expensive and difficult to obtain. The trained QG models{'} effectiveness can degrade significantly when they are applied on a different domain due to domain shift. In this paper, we explore an \textit{unsupervised domain adaptation} approach to combat the lack of training data and domain shift issue with domain data selection and self-training. We first present a novel answer-aware strategy for domain data selection to select data with the most similarity to a new domain. The selected data are then used as pseudo-in-domain data to retrain the QG model. We then present generation confidence guided self-training with two generation confidence modeling methods (i) generated questions{'} perplexity and (ii) the fluency score. We test our approaches on three large public datasets with different domain similarities, using a transformer-based pre-trained QG model. The results show that our proposed approaches outperform the baselines, and show the viability of unsupervised domain adaptation with answer-aware data selection and self-training on the QG task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-hauff-2022-unsupervised">
<titleInfo>
<title>Unsupervised Domain Adaptation for Question Generation with DomainData Selection and Self-training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peide</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Hauff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="family">Meza Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Question generation (QG) approaches based on large neural models require (i) large-scale and (ii) high-quality training data. These two requirements pose difficulties for specific application domains where training data is expensive and difficult to obtain. The trained QG models’ effectiveness can degrade significantly when they are applied on a different domain due to domain shift. In this paper, we explore an unsupervised domain adaptation approach to combat the lack of training data and domain shift issue with domain data selection and self-training. We first present a novel answer-aware strategy for domain data selection to select data with the most similarity to a new domain. The selected data are then used as pseudo-in-domain data to retrain the QG model. We then present generation confidence guided self-training with two generation confidence modeling methods (i) generated questions’ perplexity and (ii) the fluency score. We test our approaches on three large public datasets with different domain similarities, using a transformer-based pre-trained QG model. The results show that our proposed approaches outperform the baselines, and show the viability of unsupervised domain adaptation with answer-aware data selection and self-training on the QG task.</abstract>
<identifier type="citekey">zhu-hauff-2022-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2022.findings-naacl.183</identifier>
<location>
<url>https://aclanthology.org/2022.findings-naacl.183</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>2388</start>
<end>2401</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Domain Adaptation for Question Generation with DomainData Selection and Self-training
%A Zhu, Peide
%A Hauff, Claudia
%Y Carpuat, Marine
%Y de Marneffe, Marie-Catherine
%Y Meza Ruiz, Ivan Vladimir
%S Findings of the Association for Computational Linguistics: NAACL 2022
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, United States
%F zhu-hauff-2022-unsupervised
%X Question generation (QG) approaches based on large neural models require (i) large-scale and (ii) high-quality training data. These two requirements pose difficulties for specific application domains where training data is expensive and difficult to obtain. The trained QG models’ effectiveness can degrade significantly when they are applied on a different domain due to domain shift. In this paper, we explore an unsupervised domain adaptation approach to combat the lack of training data and domain shift issue with domain data selection and self-training. We first present a novel answer-aware strategy for domain data selection to select data with the most similarity to a new domain. The selected data are then used as pseudo-in-domain data to retrain the QG model. We then present generation confidence guided self-training with two generation confidence modeling methods (i) generated questions’ perplexity and (ii) the fluency score. We test our approaches on three large public datasets with different domain similarities, using a transformer-based pre-trained QG model. The results show that our proposed approaches outperform the baselines, and show the viability of unsupervised domain adaptation with answer-aware data selection and self-training on the QG task.
%R 10.18653/v1/2022.findings-naacl.183
%U https://aclanthology.org/2022.findings-naacl.183
%U https://doi.org/10.18653/v1/2022.findings-naacl.183
%P 2388-2401
Markdown (Informal)
[Unsupervised Domain Adaptation for Question Generation with DomainData Selection and Self-training](https://aclanthology.org/2022.findings-naacl.183) (Zhu & Hauff, Findings 2022)
ACL