@inproceedings{luo-etal-2022-chinese,
title = "{C}hinese Movie Dialogue Question Answering Dataset",
author = "Luo, Shang-Bao and
Fan, Cheng-Chung and
Chen, Kuan-Yu and
Tsao, Yu and
Wang, Hsin-Min and
Su, Keh-Yih",
booktitle = "Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)",
month = nov,
year = "2022",
address = "Taipei, Taiwan",
publisher = "The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)",
url = "https://aclanthology.org/2022.rocling-1.2",
pages = "7--14",
abstract = "This paper constructs a Chinese dialogue-based information-seeking question answering dataset CMDQA, which is mainly applied to the scenario of getting Chinese movie related information. It contains 10K QA dialogs (40K turns in total). All questions and background documents are compiled from the Wikipedia via an Internet crawler. The answers to the questions are obtained via extracting the corresponding answer spans within the related text passage. In CMDQA, in addition to searching related documents, pronouns are also added to the question to better mimic the real dialog scenario. This dataset can test the individual performance of the information retrieval, the question answering and the question re-writing modules. This paper also provides a baseline system and shows its performance on this dataset. The experiments elucidate that it still has a big gap to catch the human performance. This dataset thus provides enough challenge for the researcher to conduct related research.",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luo-etal-2022-chinese">
<titleInfo>
<title>Chinese Movie Dialogue Question Answering Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shang-Bao</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Chung</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kuan-Yu</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Tsao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Min</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keh-Yih</namePart>
<namePart type="family">Su</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)</title>
</titleInfo>
<originInfo>
<publisher>The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)</publisher>
<place>
<placeTerm type="text">Taipei, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper constructs a Chinese dialogue-based information-seeking question answering dataset CMDQA, which is mainly applied to the scenario of getting Chinese movie related information. It contains 10K QA dialogs (40K turns in total). All questions and background documents are compiled from the Wikipedia via an Internet crawler. The answers to the questions are obtained via extracting the corresponding answer spans within the related text passage. In CMDQA, in addition to searching related documents, pronouns are also added to the question to better mimic the real dialog scenario. This dataset can test the individual performance of the information retrieval, the question answering and the question re-writing modules. This paper also provides a baseline system and shows its performance on this dataset. The experiments elucidate that it still has a big gap to catch the human performance. This dataset thus provides enough challenge for the researcher to conduct related research.</abstract>
<identifier type="citekey">luo-etal-2022-chinese</identifier>
<location>
<url>https://aclanthology.org/2022.rocling-1.2</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>7</start>
<end>14</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Chinese Movie Dialogue Question Answering Dataset
%A Luo, Shang-Bao
%A Fan, Cheng-Chung
%A Chen, Kuan-Yu
%A Tsao, Yu
%A Wang, Hsin-Min
%A Su, Keh-Yih
%S Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)
%D 2022
%8 November
%I The Association for Computational Linguistics and Chinese Language Processing (ACLCLP)
%C Taipei, Taiwan
%G Chinese
%F luo-etal-2022-chinese
%X This paper constructs a Chinese dialogue-based information-seeking question answering dataset CMDQA, which is mainly applied to the scenario of getting Chinese movie related information. It contains 10K QA dialogs (40K turns in total). All questions and background documents are compiled from the Wikipedia via an Internet crawler. The answers to the questions are obtained via extracting the corresponding answer spans within the related text passage. In CMDQA, in addition to searching related documents, pronouns are also added to the question to better mimic the real dialog scenario. This dataset can test the individual performance of the information retrieval, the question answering and the question re-writing modules. This paper also provides a baseline system and shows its performance on this dataset. The experiments elucidate that it still has a big gap to catch the human performance. This dataset thus provides enough challenge for the researcher to conduct related research.
%U https://aclanthology.org/2022.rocling-1.2
%P 7-14
Markdown (Informal)
[Chinese Movie Dialogue Question Answering Dataset](https://aclanthology.org/2022.rocling-1.2) (Luo et al., ROCLING 2022)
ACL
- Shang-Bao Luo, Cheng-Chung Fan, Kuan-Yu Chen, Yu Tsao, Hsin-Min Wang, and Keh-Yih Su. 2022. Chinese Movie Dialogue Question Answering Dataset. In Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022), pages 7–14, Taipei, Taiwan. The Association for Computational Linguistics and Chinese Language Processing (ACLCLP).