@inproceedings{mutinda-etal-2022-pico,
title = "{PICO} Corpus: A Publicly Available Corpus to Support Automatic Data Extraction from Biomedical Literature",
author = "Mutinda, Faith and
Liew, Kongmeng and
Yada, Shuntaro and
Wakamiya, Shoko and
Aramaki, Eiji",
editor = "Ghosal, Tirthankar and
Blanco-Cuaresma, Sergi and
Accomazzi, Alberto and
Patton, Robert M. and
Grezes, Felix and
Allen, Thomas",
booktitle = "Proceedings of the first Workshop on Information Extraction from Scientific Publications",
month = nov,
year = "2022",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wiesp-1.4",
doi = "10.18653/v1/2022.wiesp-1.4",
pages = "26--31",
abstract = "We present a publicly available corpus with detailed annotations describing the core elements of clinical trials: Participants, Intervention, Control, and Outcomes. The corpus consists of 1011 abstracts of breast cancer randomized controlled trials extracted from the PubMed database. The corpus improves previous corpora by providing detailed annotations for outcomes to identify numeric texts that report the number of participants that experience specific outcomes. The corpus will be helpful for the development of systems for automatic extraction of data from randomized controlled trial literature to support evidence-based medicine. Additionally, we demonstrate the feasibility of the corpus by using two strong baselines for named entity recognition task. Most of the entities achieve F1 scores greater than 0.80 demonstrating the quality of the dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mutinda-etal-2022-pico">
<titleInfo>
<title>PICO Corpus: A Publicly Available Corpus to Support Automatic Data Extraction from Biomedical Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Faith</namePart>
<namePart type="family">Mutinda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kongmeng</namePart>
<namePart type="family">Liew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuntaro</namePart>
<namePart type="family">Yada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoko</namePart>
<namePart type="family">Wakamiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiji</namePart>
<namePart type="family">Aramaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first Workshop on Information Extraction from Scientific Publications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergi</namePart>
<namePart type="family">Blanco-Cuaresma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Accomazzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Patton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Grezes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Allen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a publicly available corpus with detailed annotations describing the core elements of clinical trials: Participants, Intervention, Control, and Outcomes. The corpus consists of 1011 abstracts of breast cancer randomized controlled trials extracted from the PubMed database. The corpus improves previous corpora by providing detailed annotations for outcomes to identify numeric texts that report the number of participants that experience specific outcomes. The corpus will be helpful for the development of systems for automatic extraction of data from randomized controlled trial literature to support evidence-based medicine. Additionally, we demonstrate the feasibility of the corpus by using two strong baselines for named entity recognition task. Most of the entities achieve F1 scores greater than 0.80 demonstrating the quality of the dataset.</abstract>
<identifier type="citekey">mutinda-etal-2022-pico</identifier>
<identifier type="doi">10.18653/v1/2022.wiesp-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.wiesp-1.4</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>26</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PICO Corpus: A Publicly Available Corpus to Support Automatic Data Extraction from Biomedical Literature
%A Mutinda, Faith
%A Liew, Kongmeng
%A Yada, Shuntaro
%A Wakamiya, Shoko
%A Aramaki, Eiji
%Y Ghosal, Tirthankar
%Y Blanco-Cuaresma, Sergi
%Y Accomazzi, Alberto
%Y Patton, Robert M.
%Y Grezes, Felix
%Y Allen, Thomas
%S Proceedings of the first Workshop on Information Extraction from Scientific Publications
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online
%F mutinda-etal-2022-pico
%X We present a publicly available corpus with detailed annotations describing the core elements of clinical trials: Participants, Intervention, Control, and Outcomes. The corpus consists of 1011 abstracts of breast cancer randomized controlled trials extracted from the PubMed database. The corpus improves previous corpora by providing detailed annotations for outcomes to identify numeric texts that report the number of participants that experience specific outcomes. The corpus will be helpful for the development of systems for automatic extraction of data from randomized controlled trial literature to support evidence-based medicine. Additionally, we demonstrate the feasibility of the corpus by using two strong baselines for named entity recognition task. Most of the entities achieve F1 scores greater than 0.80 demonstrating the quality of the dataset.
%R 10.18653/v1/2022.wiesp-1.4
%U https://aclanthology.org/2022.wiesp-1.4
%U https://doi.org/10.18653/v1/2022.wiesp-1.4
%P 26-31
Markdown (Informal)
[PICO Corpus: A Publicly Available Corpus to Support Automatic Data Extraction from Biomedical Literature](https://aclanthology.org/2022.wiesp-1.4) (Mutinda et al., WIESP 2022)
ACL