@inproceedings{zhang-etal-2023-convrgx,
title = "{C}onv{RGX}: Recognition, Generation, and Extraction for Self-trained Conversational Question Answering",
author = "Zhang, Tianhua and
Tang, Liping and
Fang, Wei and
Luo, Hongyin and
Wu, Xixin and
Meng, Helen and
Glass, James",
editor = "Muresan, Smaranda and
Chen, Vivian and
Casey, Kennington and
David, Vandyke and
Nina, Dethlefs and
Koji, Inoue and
Erik, Ekstedt and
Stefan, Ultes",
booktitle = "Proceedings of the Third DialDoc Workshop on Document-grounded Dialogue and Conversational Question Answering",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.dialdoc-1.10/",
doi = "10.18653/v1/2023.dialdoc-1.10",
pages = "86--100",
abstract = "Collecting and constructing human-annotated corpora for training conversational question-answering (CQA) models has recently been shown to be inefficient and costly. To solve this problem, previous works have proposed training QA models with automatically generated QA data. In this work, we extend earlier studies on QA synthesis, and propose an efficient QA data generation algorithm under conversational settings. Our model recognizes potential dialogue topics, generates corresponding questions, and extracts answers from grounding passages. To improve the quality of generated QAs and downstream self-training of CQA models, we propose dropout and agreement-based QA selection methods. We conduct experiments on both data augmentation and domain adaptation settings. Experiments on the QuAC and Doc2Dial tasks show that the proposed method can significantly improve the quality of generated QA data, and also improves the accuracy of self-trained CQA models based on the constructed training corpora."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2023-convrgx">
<titleInfo>
<title>ConvRGX: Recognition, Generation, and Extraction for Self-trained Conversational Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tianhua</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liping</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyin</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xixin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Meng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Glass</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third DialDoc Workshop on Document-grounded Dialogue and Conversational Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivian</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kennington</namePart>
<namePart type="family">Casey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vandyke</namePart>
<namePart type="family">David</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dethlefs</namePart>
<namePart type="family">Nina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inoue</namePart>
<namePart type="family">Koji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekstedt</namePart>
<namePart type="family">Erik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ultes</namePart>
<namePart type="family">Stefan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Collecting and constructing human-annotated corpora for training conversational question-answering (CQA) models has recently been shown to be inefficient and costly. To solve this problem, previous works have proposed training QA models with automatically generated QA data. In this work, we extend earlier studies on QA synthesis, and propose an efficient QA data generation algorithm under conversational settings. Our model recognizes potential dialogue topics, generates corresponding questions, and extracts answers from grounding passages. To improve the quality of generated QAs and downstream self-training of CQA models, we propose dropout and agreement-based QA selection methods. We conduct experiments on both data augmentation and domain adaptation settings. Experiments on the QuAC and Doc2Dial tasks show that the proposed method can significantly improve the quality of generated QA data, and also improves the accuracy of self-trained CQA models based on the constructed training corpora.</abstract>
<identifier type="citekey">zhang-etal-2023-convrgx</identifier>
<identifier type="doi">10.18653/v1/2023.dialdoc-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.dialdoc-1.10/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>86</start>
<end>100</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ConvRGX: Recognition, Generation, and Extraction for Self-trained Conversational Question Answering
%A Zhang, Tianhua
%A Tang, Liping
%A Fang, Wei
%A Luo, Hongyin
%A Wu, Xixin
%A Meng, Helen
%A Glass, James
%Y Muresan, Smaranda
%Y Chen, Vivian
%Y Casey, Kennington
%Y David, Vandyke
%Y Nina, Dethlefs
%Y Koji, Inoue
%Y Erik, Ekstedt
%Y Stefan, Ultes
%S Proceedings of the Third DialDoc Workshop on Document-grounded Dialogue and Conversational Question Answering
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F zhang-etal-2023-convrgx
%X Collecting and constructing human-annotated corpora for training conversational question-answering (CQA) models has recently been shown to be inefficient and costly. To solve this problem, previous works have proposed training QA models with automatically generated QA data. In this work, we extend earlier studies on QA synthesis, and propose an efficient QA data generation algorithm under conversational settings. Our model recognizes potential dialogue topics, generates corresponding questions, and extracts answers from grounding passages. To improve the quality of generated QAs and downstream self-training of CQA models, we propose dropout and agreement-based QA selection methods. We conduct experiments on both data augmentation and domain adaptation settings. Experiments on the QuAC and Doc2Dial tasks show that the proposed method can significantly improve the quality of generated QA data, and also improves the accuracy of self-trained CQA models based on the constructed training corpora.
%R 10.18653/v1/2023.dialdoc-1.10
%U https://aclanthology.org/2023.dialdoc-1.10/
%U https://doi.org/10.18653/v1/2023.dialdoc-1.10
%P 86-100
Markdown (Informal)
[ConvRGX: Recognition, Generation, and Extraction for Self-trained Conversational Question Answering](https://aclanthology.org/2023.dialdoc-1.10/) (Zhang et al., dialdoc 2023)
ACL
- Tianhua Zhang, Liping Tang, Wei Fang, Hongyin Luo, Xixin Wu, Helen Meng, and James Glass. 2023. ConvRGX: Recognition, Generation, and Extraction for Self-trained Conversational Question Answering. In Proceedings of the Third DialDoc Workshop on Document-grounded Dialogue and Conversational Question Answering, pages 86–100, Toronto, Canada. Association for Computational Linguistics.