@inproceedings{unlu-menevse-etal-2022-framework,
title = "A Framework for Automatic Generation of Spoken Question-Answering Data",
author = {{\"U}nl{\"u} Menev{\c{s}}e, Merve and
Manav, Yusufcan and
Arisoy, Ebru and
{\"O}zg{\"u}r, Arzucan},
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.342",
doi = "10.18653/v1/2022.findings-emnlp.342",
pages = "4659--4666",
abstract = "This paper describes a framework to automatically generate a spoken question answering (QA) dataset. The framework consists of a question generation (QG) module to generate questions automatically from given text documents, a text-to-speech (TTS) module to convert the text documents into spoken form and an automatic speech recognition (ASR) module to transcribe the spoken content. The final dataset contains question-answer pairs for both the reference text and ASR transcriptions as well as the audio files corresponding to each reference text. For QG and ASR systems we used pre-trained multilingual encoder-decoder transformer models and fine-tuned these models using a limited amount of manually generated QA data and TTS-based speech data, respectively. As a proof of concept, we investigated the proposed framework for Turkish and generated the Turkish Question Answering (TurQuAse) dataset using Wikipedia articles. Manual evaluation of the automatically generated question- answer pairs and QA performance evaluation with state of-the-art models on TurQuAse show that the proposed framework is efficient for automatically generating spoken QA datasets. To the best of our knowledge, TurQuAse is the first publicly available spoken question answering dataset for Turkish. The proposed framework can be easily extended to other languages where a limited amount of QA data is available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="unlu-menevse-etal-2022-framework">
<titleInfo>
<title>A Framework for Automatic Generation of Spoken Question-Answering Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Merve</namePart>
<namePart type="family">Ünlü Menevşe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusufcan</namePart>
<namePart type="family">Manav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ebru</namePart>
<namePart type="family">Arisoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arzucan</namePart>
<namePart type="family">Özgür</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a framework to automatically generate a spoken question answering (QA) dataset. The framework consists of a question generation (QG) module to generate questions automatically from given text documents, a text-to-speech (TTS) module to convert the text documents into spoken form and an automatic speech recognition (ASR) module to transcribe the spoken content. The final dataset contains question-answer pairs for both the reference text and ASR transcriptions as well as the audio files corresponding to each reference text. For QG and ASR systems we used pre-trained multilingual encoder-decoder transformer models and fine-tuned these models using a limited amount of manually generated QA data and TTS-based speech data, respectively. As a proof of concept, we investigated the proposed framework for Turkish and generated the Turkish Question Answering (TurQuAse) dataset using Wikipedia articles. Manual evaluation of the automatically generated question- answer pairs and QA performance evaluation with state of-the-art models on TurQuAse show that the proposed framework is efficient for automatically generating spoken QA datasets. To the best of our knowledge, TurQuAse is the first publicly available spoken question answering dataset for Turkish. The proposed framework can be easily extended to other languages where a limited amount of QA data is available.</abstract>
<identifier type="citekey">unlu-menevse-etal-2022-framework</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.342</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.342</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>4659</start>
<end>4666</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Framework for Automatic Generation of Spoken Question-Answering Data
%A Ünlü Menevşe, Merve
%A Manav, Yusufcan
%A Arisoy, Ebru
%A Özgür, Arzucan
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F unlu-menevse-etal-2022-framework
%X This paper describes a framework to automatically generate a spoken question answering (QA) dataset. The framework consists of a question generation (QG) module to generate questions automatically from given text documents, a text-to-speech (TTS) module to convert the text documents into spoken form and an automatic speech recognition (ASR) module to transcribe the spoken content. The final dataset contains question-answer pairs for both the reference text and ASR transcriptions as well as the audio files corresponding to each reference text. For QG and ASR systems we used pre-trained multilingual encoder-decoder transformer models and fine-tuned these models using a limited amount of manually generated QA data and TTS-based speech data, respectively. As a proof of concept, we investigated the proposed framework for Turkish and generated the Turkish Question Answering (TurQuAse) dataset using Wikipedia articles. Manual evaluation of the automatically generated question- answer pairs and QA performance evaluation with state of-the-art models on TurQuAse show that the proposed framework is efficient for automatically generating spoken QA datasets. To the best of our knowledge, TurQuAse is the first publicly available spoken question answering dataset for Turkish. The proposed framework can be easily extended to other languages where a limited amount of QA data is available.
%R 10.18653/v1/2022.findings-emnlp.342
%U https://aclanthology.org/2022.findings-emnlp.342
%U https://doi.org/10.18653/v1/2022.findings-emnlp.342
%P 4659-4666
Markdown (Informal)
[A Framework for Automatic Generation of Spoken Question-Answering Data](https://aclanthology.org/2022.findings-emnlp.342) (Ünlü Menevşe et al., Findings 2022)
ACL