@inproceedings{sinha-etal-2026-medical,
title = "Medical Context Variation: A source of impairment for Event classification",
author = "Sinha, Aman and
Clausel, Marianne and
Constant, Mathieu and
Coubez, Xavier",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.bionlp-1.70/",
pages = "864--879",
ISBN = "979-8-89176-434-7",
abstract = "The variation in writing style encapsulates nuanced characteristics, which are often exploited for author or demographic identification. In the medical domain, language models are frequently deployed to capture relevant information from unstructured or complex data, such as clinical notes that often include patients' medical histories. Such data is largely free-form and unstructured, obtained through diverse clinician?patient interactions. In this work, we present a case study investigating whether variations in clinicians' writing styles can lead to differences in medical context understanding capabilities for pre-trained language models (PLMs) on downstream tasks, such as medical event classification. Our findings indicate that variation in writing style, characterized by linguistic features, can indeed lead to suboptimal performance in deployed systems. Furthermore, we explore linguistic guided counterfactual reasoning in order to mitigate the impact of writing style variation which suggests LLM-based stylistic normalization to be effective for this purpose."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sinha-etal-2026-medical">
<titleInfo>
<title>Medical Context Variation: A source of impairment for Event classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aman</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianne</namePart>
<namePart type="family">Clausel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Coubez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirk</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-434-7</identifier>
</relatedItem>
<abstract>The variation in writing style encapsulates nuanced characteristics, which are often exploited for author or demographic identification. In the medical domain, language models are frequently deployed to capture relevant information from unstructured or complex data, such as clinical notes that often include patients’ medical histories. Such data is largely free-form and unstructured, obtained through diverse clinician?patient interactions. In this work, we present a case study investigating whether variations in clinicians’ writing styles can lead to differences in medical context understanding capabilities for pre-trained language models (PLMs) on downstream tasks, such as medical event classification. Our findings indicate that variation in writing style, characterized by linguistic features, can indeed lead to suboptimal performance in deployed systems. Furthermore, we explore linguistic guided counterfactual reasoning in order to mitigate the impact of writing style variation which suggests LLM-based stylistic normalization to be effective for this purpose.</abstract>
<identifier type="citekey">sinha-etal-2026-medical</identifier>
<location>
<url>https://aclanthology.org/2026.bionlp-1.70/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>864</start>
<end>879</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Medical Context Variation: A source of impairment for Event classification
%A Sinha, Aman
%A Clausel, Marianne
%A Constant, Mathieu
%A Coubez, Xavier
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Roberts, Kirk
%Y Tsujii, Junichi
%S BioNLP 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California
%@ 979-8-89176-434-7
%F sinha-etal-2026-medical
%X The variation in writing style encapsulates nuanced characteristics, which are often exploited for author or demographic identification. In the medical domain, language models are frequently deployed to capture relevant information from unstructured or complex data, such as clinical notes that often include patients’ medical histories. Such data is largely free-form and unstructured, obtained through diverse clinician?patient interactions. In this work, we present a case study investigating whether variations in clinicians’ writing styles can lead to differences in medical context understanding capabilities for pre-trained language models (PLMs) on downstream tasks, such as medical event classification. Our findings indicate that variation in writing style, characterized by linguistic features, can indeed lead to suboptimal performance in deployed systems. Furthermore, we explore linguistic guided counterfactual reasoning in order to mitigate the impact of writing style variation which suggests LLM-based stylistic normalization to be effective for this purpose.
%U https://aclanthology.org/2026.bionlp-1.70/
%P 864-879
Markdown (Informal)
[Medical Context Variation: A source of impairment for Event classification](https://aclanthology.org/2026.bionlp-1.70/) (Sinha et al., BioNLP 2026)
ACL