@inproceedings{schacht-delucchi-danhier-2026-studying-expert,
title = "Studying Expert-ese: Profiling and Classification of Domain-Specific Language Variation in Architecture with Traditional Machine Learning and {LLM}s",
author = "Schacht, Carmen and
Delucchi Danhier, Renate",
editor = "Alves, Diego and
Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Pagel, Janis and
Szpakowicz, Stan",
booktitle = "Proceedings of the 10th Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.latechclfl-1.3/",
pages = "16--29",
ISBN = "979-8-89176-373-9",
abstract = "This study investigates how domain expertise shapes spontaneous oral language production, with a focus on architecture. Building on the ExpLay Corpus, which contains image descriptions by speakers with and without architectural training, we analyze linguistic variation by combining Profiling-UD and the DECAF framework. We extract a broad range of syntactic and morpho-syntactic features to build linguistic profiles for both groups and train classifiers to distinguish expert from non-expert productions. Two traditional machine learning models (logistic regression and SVM) are compared with a lightweight BiLSTM and two large language models (GliClass and LLaMA 2). While the expert and non-expert corpora diverge only subtly (pairwise Jensen{--}Shannon divergence (JSD)= 0.25), the BiLSTM using fastText embeddings achieves the highest F1-score (0.88), outperforming both traditional models and LLMs. This indicates that semantic representations are more predictive of domain variation than purely structural features and that smaller neural architectures generalize better on limited data. Overall, the findings provide empirical evidence that architectural expertise leaves measurable linguistic traces in spontaneous speech, supporting the Grammar of Space hypothesis."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schacht-delucchi-danhier-2026-studying-expert">
<titleInfo>
<title>Studying Expert-ese: Profiling and Classification of Domain-Specific Language Variation in Architecture with Traditional Machine Learning and LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carmen</namePart>
<namePart type="family">Schacht</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renate</namePart>
<namePart type="family">Delucchi Danhier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janis</namePart>
<namePart type="family">Pagel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-373-9</identifier>
</relatedItem>
<abstract>This study investigates how domain expertise shapes spontaneous oral language production, with a focus on architecture. Building on the ExpLay Corpus, which contains image descriptions by speakers with and without architectural training, we analyze linguistic variation by combining Profiling-UD and the DECAF framework. We extract a broad range of syntactic and morpho-syntactic features to build linguistic profiles for both groups and train classifiers to distinguish expert from non-expert productions. Two traditional machine learning models (logistic regression and SVM) are compared with a lightweight BiLSTM and two large language models (GliClass and LLaMA 2). While the expert and non-expert corpora diverge only subtly (pairwise Jensen–Shannon divergence (JSD)= 0.25), the BiLSTM using fastText embeddings achieves the highest F1-score (0.88), outperforming both traditional models and LLMs. This indicates that semantic representations are more predictive of domain variation than purely structural features and that smaller neural architectures generalize better on limited data. Overall, the findings provide empirical evidence that architectural expertise leaves measurable linguistic traces in spontaneous speech, supporting the Grammar of Space hypothesis.</abstract>
<identifier type="citekey">schacht-delucchi-danhier-2026-studying-expert</identifier>
<location>
<url>https://aclanthology.org/2026.latechclfl-1.3/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>16</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Studying Expert-ese: Profiling and Classification of Domain-Specific Language Variation in Architecture with Traditional Machine Learning and LLMs
%A Schacht, Carmen
%A Delucchi Danhier, Renate
%Y Alves, Diego
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Pagel, Janis
%Y Szpakowicz, Stan
%S Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-373-9
%F schacht-delucchi-danhier-2026-studying-expert
%X This study investigates how domain expertise shapes spontaneous oral language production, with a focus on architecture. Building on the ExpLay Corpus, which contains image descriptions by speakers with and without architectural training, we analyze linguistic variation by combining Profiling-UD and the DECAF framework. We extract a broad range of syntactic and morpho-syntactic features to build linguistic profiles for both groups and train classifiers to distinguish expert from non-expert productions. Two traditional machine learning models (logistic regression and SVM) are compared with a lightweight BiLSTM and two large language models (GliClass and LLaMA 2). While the expert and non-expert corpora diverge only subtly (pairwise Jensen–Shannon divergence (JSD)= 0.25), the BiLSTM using fastText embeddings achieves the highest F1-score (0.88), outperforming both traditional models and LLMs. This indicates that semantic representations are more predictive of domain variation than purely structural features and that smaller neural architectures generalize better on limited data. Overall, the findings provide empirical evidence that architectural expertise leaves measurable linguistic traces in spontaneous speech, supporting the Grammar of Space hypothesis.
%U https://aclanthology.org/2026.latechclfl-1.3/
%P 16-29
Markdown (Informal)
[Studying Expert-ese: Profiling and Classification of Domain-Specific Language Variation in Architecture with Traditional Machine Learning and LLMs](https://aclanthology.org/2026.latechclfl-1.3/) (Schacht & Delucchi Danhier, LaTeCH-CLfL 2026)
ACL