@inproceedings{hanyu-etal-2021-chinese,
title = "A {C}hinese Machine Reading Comprehension Dataset Automatic Generated Based on Knowledge Graph",
author = "Hanyu, Zhao and
Sha, Yuan and
Jiahong, Leng and
Xiang, Pan and
Zhao, Xue and
Quanyue, Ma and
Yangxiao, Liang",
editor = "Li, Sheng and
Sun, Maosong and
Liu, Yang and
Wu, Hua and
Liu, Kang and
Che, Wanxiang and
He, Shizhu and
Rao, Gaoqi",
booktitle = "Proceedings of the 20th Chinese National Conference on Computational Linguistics",
month = aug,
year = "2021",
address = "Huhhot, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2021.ccl-1.95",
pages = "1066--1075",
abstract = "Machine reading comprehension (MRC) is a typical natural language processing (NLP)task and has developed rapidly in the last few years. Various reading comprehension datasets have been built to support MRC studies. However large-scale and high-quality datasets are rare due to the high complexity and huge workforce cost of making sucha dataset. Besides most reading comprehension datasets are in English and Chinesedatasets are insufficient. In this paper we propose an automatic method for MRCdataset generation and build the largest Chinese medical reading comprehension dataset presently named CMedRC. Our dataset contains 17k questions generated by our auto-matic method and some seed questions. We obtain the corresponding answers from amedical knowledge graph and manually check all of them. Finally we test BiLSTM andBERT-based pre-trained language models (PLMs) on our dataset and propose a base-line for the following studies. Results show that the automatic MRC dataset generation method is considerable for future model improvements.",
language = "English",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hanyu-etal-2021-chinese">
<titleInfo>
<title>A Chinese Machine Reading Comprehension Dataset Automatic Generated Based on Knowledge Graph</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhao</namePart>
<namePart type="family">Hanyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Sha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leng</namePart>
<namePart type="family">Jiahong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pan</namePart>
<namePart type="family">Xiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xue</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ma</namePart>
<namePart type="family">Quanyue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Yangxiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaoqi</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Huhhot, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine reading comprehension (MRC) is a typical natural language processing (NLP)task and has developed rapidly in the last few years. Various reading comprehension datasets have been built to support MRC studies. However large-scale and high-quality datasets are rare due to the high complexity and huge workforce cost of making sucha dataset. Besides most reading comprehension datasets are in English and Chinesedatasets are insufficient. In this paper we propose an automatic method for MRCdataset generation and build the largest Chinese medical reading comprehension dataset presently named CMedRC. Our dataset contains 17k questions generated by our auto-matic method and some seed questions. We obtain the corresponding answers from amedical knowledge graph and manually check all of them. Finally we test BiLSTM andBERT-based pre-trained language models (PLMs) on our dataset and propose a base-line for the following studies. Results show that the automatic MRC dataset generation method is considerable for future model improvements.</abstract>
<identifier type="citekey">hanyu-etal-2021-chinese</identifier>
<location>
<url>https://aclanthology.org/2021.ccl-1.95</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1066</start>
<end>1075</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Chinese Machine Reading Comprehension Dataset Automatic Generated Based on Knowledge Graph
%A Hanyu, Zhao
%A Sha, Yuan
%A Jiahong, Leng
%A Xiang, Pan
%A Zhao, Xue
%A Quanyue, Ma
%A Yangxiao, Liang
%Y Li, Sheng
%Y Sun, Maosong
%Y Liu, Yang
%Y Wu, Hua
%Y Liu, Kang
%Y Che, Wanxiang
%Y He, Shizhu
%Y Rao, Gaoqi
%S Proceedings of the 20th Chinese National Conference on Computational Linguistics
%D 2021
%8 August
%I Chinese Information Processing Society of China
%C Huhhot, China
%G English
%F hanyu-etal-2021-chinese
%X Machine reading comprehension (MRC) is a typical natural language processing (NLP)task and has developed rapidly in the last few years. Various reading comprehension datasets have been built to support MRC studies. However large-scale and high-quality datasets are rare due to the high complexity and huge workforce cost of making sucha dataset. Besides most reading comprehension datasets are in English and Chinesedatasets are insufficient. In this paper we propose an automatic method for MRCdataset generation and build the largest Chinese medical reading comprehension dataset presently named CMedRC. Our dataset contains 17k questions generated by our auto-matic method and some seed questions. We obtain the corresponding answers from amedical knowledge graph and manually check all of them. Finally we test BiLSTM andBERT-based pre-trained language models (PLMs) on our dataset and propose a base-line for the following studies. Results show that the automatic MRC dataset generation method is considerable for future model improvements.
%U https://aclanthology.org/2021.ccl-1.95
%P 1066-1075
Markdown (Informal)
[A Chinese Machine Reading Comprehension Dataset Automatic Generated Based on Knowledge Graph](https://aclanthology.org/2021.ccl-1.95) (Hanyu et al., CCL 2021)
ACL