@inproceedings{hewett-stede-2025-disagreements,
title = "Disagreements in analyses of rhetorical text structure: A new dataset and first analyses",
author = "Hewett, Freya and
Stede, Manfred",
editor = "Peng, Siyao and
Rehbein, Ines",
booktitle = "Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.law-1.3/",
doi = "10.18653/v1/2025.law-1.3",
pages = "35--47",
ISBN = "979-8-89176-262-6",
abstract = "Discourse structure annotation is known to involve a high level of subjectivity, which often results in low inter-annotator agreement. In this paper, we focus on ``legitimate disagreements'', by which we refer to multiple valid annotations for a text or text segment. We provide a new dataset of English and German texts, where each text comes with two parallel analyses (both done by well-trained annotators) in the framework of Rhetorical Structure Theory. Using the RST Tace tool, we build a list of all conflicting annotation decisions and present some statistics for the corpus. Thereafter, we undertake a qualitative analysis of the disagreements and propose a typology of underlying reasons. From this we derive the need to differentiate two kinds of ambiguities in RST annotation: those that result from inherent ``everyday'' linguistic ambiguity, and those that arise from specifications in the theory and/or the annotation schemes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hewett-stede-2025-disagreements">
<titleInfo>
<title>Disagreements in analyses of rhetorical text structure: A new dataset and first analyses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Freya</namePart>
<namePart type="family">Hewett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manfred</namePart>
<namePart type="family">Stede</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siyao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-262-6</identifier>
</relatedItem>
<abstract>Discourse structure annotation is known to involve a high level of subjectivity, which often results in low inter-annotator agreement. In this paper, we focus on “legitimate disagreements”, by which we refer to multiple valid annotations for a text or text segment. We provide a new dataset of English and German texts, where each text comes with two parallel analyses (both done by well-trained annotators) in the framework of Rhetorical Structure Theory. Using the RST Tace tool, we build a list of all conflicting annotation decisions and present some statistics for the corpus. Thereafter, we undertake a qualitative analysis of the disagreements and propose a typology of underlying reasons. From this we derive the need to differentiate two kinds of ambiguities in RST annotation: those that result from inherent “everyday” linguistic ambiguity, and those that arise from specifications in the theory and/or the annotation schemes.</abstract>
<identifier type="citekey">hewett-stede-2025-disagreements</identifier>
<identifier type="doi">10.18653/v1/2025.law-1.3</identifier>
<location>
<url>https://aclanthology.org/2025.law-1.3/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>35</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Disagreements in analyses of rhetorical text structure: A new dataset and first analyses
%A Hewett, Freya
%A Stede, Manfred
%Y Peng, Siyao
%Y Rehbein, Ines
%S Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-262-6
%F hewett-stede-2025-disagreements
%X Discourse structure annotation is known to involve a high level of subjectivity, which often results in low inter-annotator agreement. In this paper, we focus on “legitimate disagreements”, by which we refer to multiple valid annotations for a text or text segment. We provide a new dataset of English and German texts, where each text comes with two parallel analyses (both done by well-trained annotators) in the framework of Rhetorical Structure Theory. Using the RST Tace tool, we build a list of all conflicting annotation decisions and present some statistics for the corpus. Thereafter, we undertake a qualitative analysis of the disagreements and propose a typology of underlying reasons. From this we derive the need to differentiate two kinds of ambiguities in RST annotation: those that result from inherent “everyday” linguistic ambiguity, and those that arise from specifications in the theory and/or the annotation schemes.
%R 10.18653/v1/2025.law-1.3
%U https://aclanthology.org/2025.law-1.3/
%U https://doi.org/10.18653/v1/2025.law-1.3
%P 35-47
Markdown (Informal)
[Disagreements in analyses of rhetorical text structure: A new dataset and first analyses](https://aclanthology.org/2025.law-1.3/) (Hewett & Stede, LAW 2025)
ACL