@inproceedings{gonzalez-hernandez-etal-2024-end,
title = "End-to-End Relation Extraction of Pharmacokinetic Estimates from the Scientific Literature",
author = "Gonzalez Hernandez, Ferran and
Smith, Victoria and
Nguyen, Quang and
Chotsiri, Palang and
Wattanakul, Thanaporn and
Antonio Cordero, Jos{\'e} and
Ballester, Maria Rosa and
Sole, Albert and
Mundin, Gill and
Lilaonitkul, Watjana and
Standing, Joseph F. and
Kloprogge, Frank",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Miwa, Makoto and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "Proceedings of the 23rd Workshop on Biomedical Natural Language Processing",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.bionlp-1.12",
doi = "10.18653/v1/2024.bionlp-1.12",
pages = "144--154",
abstract = "The lack of comprehensive and standardised databases containing Pharmacokinetic (PK) parameters presents a challenge in the drug development pipeline. Efficiently managing the increasing volume of published PK Parameters requires automated approaches that centralise information from diverse studies. In this work, we present the Pharmacokinetic Relation Extraction Dataset (PRED), a novel, manually curated corpus developed by pharmacometricians and NLP specialists, covering multiple types of PK parameters and numerical expressions reported in open-access scientific articles. PRED covers annotations for various entities and relations involved in PK parameter measurements from 3,600 sentences. We also introduce an end-to-end relation extraction model based on BioBERT, which is trained with joint named entity recognition (NER) and relation extraction objectives. The optimal pipeline achieved a micro-average F1-score of 94{\%} for NER and over 85{\%} F1-score across all relation types. This work represents the first resource for training and evaluating models for PK end-to-end extraction across multiple parameters and study types. We make our corpus and model openly available to accelerate the construction of large PK databases and to support similar endeavours in other scientific disciplines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gonzalez-hernandez-etal-2024-end">
<titleInfo>
<title>End-to-End Relation Extraction of Pharmacokinetic Estimates from the Scientific Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ferran</namePart>
<namePart type="family">Gonzalez Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quang</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Palang</namePart>
<namePart type="family">Chotsiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thanaporn</namePart>
<namePart type="family">Wattanakul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="family">Antonio Cordero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Rosa</namePart>
<namePart type="family">Ballester</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="family">Sole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gill</namePart>
<namePart type="family">Mundin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Watjana</namePart>
<namePart type="family">Lilaonitkul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Standing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frank</namePart>
<namePart type="family">Kloprogge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Workshop on Biomedical Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Makoto</namePart>
<namePart type="family">Miwa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The lack of comprehensive and standardised databases containing Pharmacokinetic (PK) parameters presents a challenge in the drug development pipeline. Efficiently managing the increasing volume of published PK Parameters requires automated approaches that centralise information from diverse studies. In this work, we present the Pharmacokinetic Relation Extraction Dataset (PRED), a novel, manually curated corpus developed by pharmacometricians and NLP specialists, covering multiple types of PK parameters and numerical expressions reported in open-access scientific articles. PRED covers annotations for various entities and relations involved in PK parameter measurements from 3,600 sentences. We also introduce an end-to-end relation extraction model based on BioBERT, which is trained with joint named entity recognition (NER) and relation extraction objectives. The optimal pipeline achieved a micro-average F1-score of 94% for NER and over 85% F1-score across all relation types. This work represents the first resource for training and evaluating models for PK end-to-end extraction across multiple parameters and study types. We make our corpus and model openly available to accelerate the construction of large PK databases and to support similar endeavours in other scientific disciplines.</abstract>
<identifier type="citekey">gonzalez-hernandez-etal-2024-end</identifier>
<identifier type="doi">10.18653/v1/2024.bionlp-1.12</identifier>
<location>
<url>https://aclanthology.org/2024.bionlp-1.12</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>144</start>
<end>154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T End-to-End Relation Extraction of Pharmacokinetic Estimates from the Scientific Literature
%A Gonzalez Hernandez, Ferran
%A Smith, Victoria
%A Nguyen, Quang
%A Chotsiri, Palang
%A Wattanakul, Thanaporn
%A Antonio Cordero, José
%A Ballester, Maria Rosa
%A Sole, Albert
%A Mundin, Gill
%A Lilaonitkul, Watjana
%A Standing, Joseph F.
%A Kloprogge, Frank
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Miwa, Makoto
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S Proceedings of the 23rd Workshop on Biomedical Natural Language Processing
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F gonzalez-hernandez-etal-2024-end
%X The lack of comprehensive and standardised databases containing Pharmacokinetic (PK) parameters presents a challenge in the drug development pipeline. Efficiently managing the increasing volume of published PK Parameters requires automated approaches that centralise information from diverse studies. In this work, we present the Pharmacokinetic Relation Extraction Dataset (PRED), a novel, manually curated corpus developed by pharmacometricians and NLP specialists, covering multiple types of PK parameters and numerical expressions reported in open-access scientific articles. PRED covers annotations for various entities and relations involved in PK parameter measurements from 3,600 sentences. We also introduce an end-to-end relation extraction model based on BioBERT, which is trained with joint named entity recognition (NER) and relation extraction objectives. The optimal pipeline achieved a micro-average F1-score of 94% for NER and over 85% F1-score across all relation types. This work represents the first resource for training and evaluating models for PK end-to-end extraction across multiple parameters and study types. We make our corpus and model openly available to accelerate the construction of large PK databases and to support similar endeavours in other scientific disciplines.
%R 10.18653/v1/2024.bionlp-1.12
%U https://aclanthology.org/2024.bionlp-1.12
%U https://doi.org/10.18653/v1/2024.bionlp-1.12
%P 144-154
Markdown (Informal)
[End-to-End Relation Extraction of Pharmacokinetic Estimates from the Scientific Literature](https://aclanthology.org/2024.bionlp-1.12) (Gonzalez Hernandez et al., BioNLP-WS 2024)
ACL
- Ferran Gonzalez Hernandez, Victoria Smith, Quang Nguyen, Palang Chotsiri, Thanaporn Wattanakul, José Antonio Cordero, Maria Rosa Ballester, Albert Sole, Gill Mundin, Watjana Lilaonitkul, Joseph F. Standing, and Frank Kloprogge. 2024. End-to-End Relation Extraction of Pharmacokinetic Estimates from the Scientific Literature. In Proceedings of the 23rd Workshop on Biomedical Natural Language Processing, pages 144–154, Bangkok, Thailand. Association for Computational Linguistics.