@inproceedings{chetwani-mahmmdla-2025-biopsy,
title = "{BIOPSY} - Biomarkers In Oncology: Pipeline for Structured Yielding",
author = "Chetwani, Sanya A. and
Mahmmdla, Jaseem",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.159/",
pages = "2313--2321",
ISBN = "979-8-89176-333-3",
abstract = "In clinical science, biomarkers are crucial indicators for early cancer detection, prognosis, and guiding personalized treatment decisions. Although critical, extracting biomarkers and their levels from clinical texts remains a complex and underexplored problem in natural language processing research. In this paper, we present BIOPSY, an end-to-end pipeline that integrates a domain-adapted biomarker entity recognition model, a relation extraction model to link biomarkers to their respective mutations, a biomarker-type classifier, and finally, a tailored algorithm to capture biomarker expression levels. Evaluated on 5,000 real-world clinical texts, our system achieved an overall F1 score of 0.86 for oncology and 0.87 for neuroscience domains. This reveals the ability of the pipeline to adapt across various clinical sources, including trial records, research papers, and medical notes, offering the first comprehensive solution for end-to-end, context-aware biomarker extraction and interpretation in clinical research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chetwani-mahmmdla-2025-biopsy">
<titleInfo>
<title>BIOPSY - Biomarkers In Oncology: Pipeline for Structured Yielding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanya</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Chetwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaseem</namePart>
<namePart type="family">Mahmmdla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>In clinical science, biomarkers are crucial indicators for early cancer detection, prognosis, and guiding personalized treatment decisions. Although critical, extracting biomarkers and their levels from clinical texts remains a complex and underexplored problem in natural language processing research. In this paper, we present BIOPSY, an end-to-end pipeline that integrates a domain-adapted biomarker entity recognition model, a relation extraction model to link biomarkers to their respective mutations, a biomarker-type classifier, and finally, a tailored algorithm to capture biomarker expression levels. Evaluated on 5,000 real-world clinical texts, our system achieved an overall F1 score of 0.86 for oncology and 0.87 for neuroscience domains. This reveals the ability of the pipeline to adapt across various clinical sources, including trial records, research papers, and medical notes, offering the first comprehensive solution for end-to-end, context-aware biomarker extraction and interpretation in clinical research.</abstract>
<identifier type="citekey">chetwani-mahmmdla-2025-biopsy</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.159/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2313</start>
<end>2321</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BIOPSY - Biomarkers In Oncology: Pipeline for Structured Yielding
%A Chetwani, Sanya A.
%A Mahmmdla, Jaseem
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F chetwani-mahmmdla-2025-biopsy
%X In clinical science, biomarkers are crucial indicators for early cancer detection, prognosis, and guiding personalized treatment decisions. Although critical, extracting biomarkers and their levels from clinical texts remains a complex and underexplored problem in natural language processing research. In this paper, we present BIOPSY, an end-to-end pipeline that integrates a domain-adapted biomarker entity recognition model, a relation extraction model to link biomarkers to their respective mutations, a biomarker-type classifier, and finally, a tailored algorithm to capture biomarker expression levels. Evaluated on 5,000 real-world clinical texts, our system achieved an overall F1 score of 0.86 for oncology and 0.87 for neuroscience domains. This reveals the ability of the pipeline to adapt across various clinical sources, including trial records, research papers, and medical notes, offering the first comprehensive solution for end-to-end, context-aware biomarker extraction and interpretation in clinical research.
%U https://aclanthology.org/2025.emnlp-industry.159/
%P 2313-2321
Markdown (Informal)
[BIOPSY - Biomarkers In Oncology: Pipeline for Structured Yielding](https://aclanthology.org/2025.emnlp-industry.159/) (Chetwani & Mahmmdla, EMNLP 2025)
ACL