@inproceedings{schmuck-etal-2026-cross,
title = "Cross-Linguistic Situation Entity Segmentation for Discourse Analysis in Diachronic {E}nglish and {G}erman Text",
author = {Schm{\"u}ck, Hanna and
Urban, Veronika and
Kr{\"u}ckl, Xaver and
Zeman, Sonja and
Claridge, Claudia and
Friedrich, Annemarie},
editor = "Liu, Yang Janet and
Gessler, Luke",
booktitle = "Proceedings of the 20th Linguistic Annotation Workshop ({LAW} {XX})",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.law-main.8/",
pages = "95--112",
ISBN = "979-8-89176-404-0",
abstract = "Situation Entity (SE) segmentation identifies clause-like discourse units focusing on verb constellations. While SE segmentation has been applied to contemporary English as a subtask of SE annotation, systematic guidelines for syntactically ambiguous constructions remain underspecified. We present principled SE segmentation guidelines for contemporary and historical varieties of English and German. Our inter-annotator agreement studies on Late Modern English (1700{--}1900) and New High German (1650{--}1900) corpora demonstrate substantial agreement. Using the existing SitEnt corpus in contemporary English, we implement a new automatic segmenter based on XLM-RoBERTa. Our evaluation examines cross-variety and cross-lingual generalization, demonstrating challenges both for human annotation efforts and in transferring segmenters trained on contemporary English to historical varieties. Our code and data are publicly available at https://github.com/coling-unia/sitent-segmenter-law2026."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schmuck-etal-2026-cross">
<titleInfo>
<title>Cross-Linguistic Situation Entity Segmentation for Discourse Analysis in Diachronic English and German Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Schmück</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronika</namePart>
<namePart type="family">Urban</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xaver</namePart>
<namePart type="family">Krückl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sonja</namePart>
<namePart type="family">Zeman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Claridge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Linguistic Annotation Workshop (LAW XX)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="given">Janet</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Gessler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-404-0</identifier>
</relatedItem>
<abstract>Situation Entity (SE) segmentation identifies clause-like discourse units focusing on verb constellations. While SE segmentation has been applied to contemporary English as a subtask of SE annotation, systematic guidelines for syntactically ambiguous constructions remain underspecified. We present principled SE segmentation guidelines for contemporary and historical varieties of English and German. Our inter-annotator agreement studies on Late Modern English (1700–1900) and New High German (1650–1900) corpora demonstrate substantial agreement. Using the existing SitEnt corpus in contemporary English, we implement a new automatic segmenter based on XLM-RoBERTa. Our evaluation examines cross-variety and cross-lingual generalization, demonstrating challenges both for human annotation efforts and in transferring segmenters trained on contemporary English to historical varieties. Our code and data are publicly available at https://github.com/coling-unia/sitent-segmenter-law2026.</abstract>
<identifier type="citekey">schmuck-etal-2026-cross</identifier>
<location>
<url>https://aclanthology.org/2026.law-main.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>95</start>
<end>112</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-Linguistic Situation Entity Segmentation for Discourse Analysis in Diachronic English and German Text
%A Schmück, Hanna
%A Urban, Veronika
%A Krückl, Xaver
%A Zeman, Sonja
%A Claridge, Claudia
%A Friedrich, Annemarie
%Y Liu, Yang Janet
%Y Gessler, Luke
%S Proceedings of the 20th Linguistic Annotation Workshop (LAW XX)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-404-0
%F schmuck-etal-2026-cross
%X Situation Entity (SE) segmentation identifies clause-like discourse units focusing on verb constellations. While SE segmentation has been applied to contemporary English as a subtask of SE annotation, systematic guidelines for syntactically ambiguous constructions remain underspecified. We present principled SE segmentation guidelines for contemporary and historical varieties of English and German. Our inter-annotator agreement studies on Late Modern English (1700–1900) and New High German (1650–1900) corpora demonstrate substantial agreement. Using the existing SitEnt corpus in contemporary English, we implement a new automatic segmenter based on XLM-RoBERTa. Our evaluation examines cross-variety and cross-lingual generalization, demonstrating challenges both for human annotation efforts and in transferring segmenters trained on contemporary English to historical varieties. Our code and data are publicly available at https://github.com/coling-unia/sitent-segmenter-law2026.
%U https://aclanthology.org/2026.law-main.8/
%P 95-112
Markdown (Informal)
[Cross-Linguistic Situation Entity Segmentation for Discourse Analysis in Diachronic English and German Text](https://aclanthology.org/2026.law-main.8/) (Schmück et al., LAW 2026)
ACL