@inproceedings{sharoff-etal-2026-almost,
title = "Almost Clinical: Linguistic properties of synthetic electronic health records",
author = "Sharoff, Serge and
Baker, John and
Hunt, Dr David Francis and
Simpson, Alan",
editor = {Danilova, Vera and
Kurfal{\i}, Murathan and
S{\"o}derfeldt, Ylva and
Reed, Julia and
Burchell, Andrew},
booktitle = "Proceedings of the 1st Workshop on Linguistic Analysis for Health ({H}ea{L}ing 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.healing-1.10/",
pages = "115--126",
ISBN = "979-8-89176-367-8",
abstract = "This study evaluates the linguistic and clinical suitability of synthetic electronic health records in mental health. First, we describe the rationale and the methodology for creating the synthetic corpus. Second, we examine expressions of agency, modality, and information flow across four clinical genres (Assessments, Correspondence, Referrals and Care plans) with the aim to understand how LLMs grammatically construct medical authority and patient agency through linguistic choices. While LLMs produce coherent, terminology-appropriate texts that approximate clinical practice, systematic divergences remain, including registerial shifts, insufficient clinical specificity, and inaccuracies in medication use and diagnostic procedures. The results show both the potential and limitations of synthetic corpora for enabling large-scale linguistic research otherwise impossible with genuine patient records."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sharoff-etal-2026-almost">
<titleInfo>
<title>Almost Clinical: Linguistic properties of synthetic electronic health records</title>
</titleInfo>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Baker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr</namePart>
<namePart type="given">David</namePart>
<namePart type="given">Francis</namePart>
<namePart type="family">Hunt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Simpson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Danilova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfalı</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ylva</namePart>
<namePart type="family">Söderfeldt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Reed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Burchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-367-8</identifier>
</relatedItem>
<abstract>This study evaluates the linguistic and clinical suitability of synthetic electronic health records in mental health. First, we describe the rationale and the methodology for creating the synthetic corpus. Second, we examine expressions of agency, modality, and information flow across four clinical genres (Assessments, Correspondence, Referrals and Care plans) with the aim to understand how LLMs grammatically construct medical authority and patient agency through linguistic choices. While LLMs produce coherent, terminology-appropriate texts that approximate clinical practice, systematic divergences remain, including registerial shifts, insufficient clinical specificity, and inaccuracies in medication use and diagnostic procedures. The results show both the potential and limitations of synthetic corpora for enabling large-scale linguistic research otherwise impossible with genuine patient records.</abstract>
<identifier type="citekey">sharoff-etal-2026-almost</identifier>
<location>
<url>https://aclanthology.org/2026.healing-1.10/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>115</start>
<end>126</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Almost Clinical: Linguistic properties of synthetic electronic health records
%A Sharoff, Serge
%A Baker, John
%A Hunt, Dr David Francis
%A Simpson, Alan
%Y Danilova, Vera
%Y Kurfalı, Murathan
%Y Söderfeldt, Ylva
%Y Reed, Julia
%Y Burchell, Andrew
%S Proceedings of the 1st Workshop on Linguistic Analysis for Health (HeaLing 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-367-8
%F sharoff-etal-2026-almost
%X This study evaluates the linguistic and clinical suitability of synthetic electronic health records in mental health. First, we describe the rationale and the methodology for creating the synthetic corpus. Second, we examine expressions of agency, modality, and information flow across four clinical genres (Assessments, Correspondence, Referrals and Care plans) with the aim to understand how LLMs grammatically construct medical authority and patient agency through linguistic choices. While LLMs produce coherent, terminology-appropriate texts that approximate clinical practice, systematic divergences remain, including registerial shifts, insufficient clinical specificity, and inaccuracies in medication use and diagnostic procedures. The results show both the potential and limitations of synthetic corpora for enabling large-scale linguistic research otherwise impossible with genuine patient records.
%U https://aclanthology.org/2026.healing-1.10/
%P 115-126
Markdown (Informal)
[Almost Clinical: Linguistic properties of synthetic electronic health records](https://aclanthology.org/2026.healing-1.10/) (Sharoff et al., HeaLing 2026)
ACL