@article{pery-woodley-etal-2017-corpus,
title = "A corpus-driven approach to discourse organisation: from cues to complex markers",
author = "P{\'e}ry-Woodley, Marie-Paule and
Ho-Dac, Lydia-Mai and
Rebeyrolle, Josette and
Tanguy, Ludovic and
Fabre, C`ecile",
editor = "Stent, Amanda and
Taboada, Maite and
Fern{\'a}ndez, Raquel and
Traum, David and
Poesio, Massimo and
Eugenio, Barbara Di and
Stede, Manfred",
journal = "Dialogue {\&} Discourse",
volume = "8",
month = jan,
year = "2017",
address = "Bielefeld, Germany",
publisher = "University of Bielefeld",
url = "https://aclanthology.org/2017.dnd-8.13/",
doi = "10.5087/dad.2017.103",
pages = "66--105",
abstract = "This paper reports on an experiment implementing a data-intensive approach to discourse organisation. Its focus is on enumerative structures envisaged as a type of textual pattern in a sequentiality-oriented approach to discourse. On the basis of a large-scale annotation exercise calling upon automatic feature mark-up alongside manual annotation, we explore a method to identify complex discourse markers seen as configurations of cues. The presentation of the background to what is termed ``multi-level annotation'' is organised around four issues: linearity, complexity of discourse markers, top-down processing, granularity and the multi-level nature of discourse structures. In this context, enumerative structures seem to deserve scrutiny for a number of reasons: they are frequent structures appearing at different granularity levels, they are signalled by a variety of devices appearing to work together in complex ways, and they combine a textual role (discourse organisation) with an ideational role (categorisation). We describe the annotation procedure and experimental framework which resulted in nearly 1,000 enumerative structures being annotated in a diversified corpus of over 600,000 words. The results of two approaches to the rich data produced are then presented: firstly, a descriptive survey highlights considerable variation in length and composition, while showing enumerative structure to be a basic strategy resorted to in all three sub-corpora, and leads to a granularity-based typology of the annotated structures; secondly, recurrent cue configurations{---}our ``complex{~} markers''{---}are identified by the application of data mining methods. The paper ends with perspectives for further exploitation of the data, in particular with respect to the semantic characterisation of enumerative structures."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pery-woodley-etal-2017-corpus">
<titleInfo>
<title>A corpus-driven approach to discourse organisation: from cues to complex markers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Paule</namePart>
<namePart type="family">Péry-Woodley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lydia-Mai</namePart>
<namePart type="family">Ho-Dac</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josette</namePart>
<namePart type="family">Rebeyrolle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ludovic</namePart>
<namePart type="family">Tanguy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">C‘ecile</namePart>
<namePart type="family">Fabre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Dialogue & Discourse</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>University of Bielefeld</publisher>
<place>
<placeTerm type="text">Bielefeld, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper reports on an experiment implementing a data-intensive approach to discourse organisation. Its focus is on enumerative structures envisaged as a type of textual pattern in a sequentiality-oriented approach to discourse. On the basis of a large-scale annotation exercise calling upon automatic feature mark-up alongside manual annotation, we explore a method to identify complex discourse markers seen as configurations of cues. The presentation of the background to what is termed “multi-level annotation” is organised around four issues: linearity, complexity of discourse markers, top-down processing, granularity and the multi-level nature of discourse structures. In this context, enumerative structures seem to deserve scrutiny for a number of reasons: they are frequent structures appearing at different granularity levels, they are signalled by a variety of devices appearing to work together in complex ways, and they combine a textual role (discourse organisation) with an ideational role (categorisation). We describe the annotation procedure and experimental framework which resulted in nearly 1,000 enumerative structures being annotated in a diversified corpus of over 600,000 words. The results of two approaches to the rich data produced are then presented: firstly, a descriptive survey highlights considerable variation in length and composition, while showing enumerative structure to be a basic strategy resorted to in all three sub-corpora, and leads to a granularity-based typology of the annotated structures; secondly, recurrent cue configurations—our “complex markers”—are identified by the application of data mining methods. The paper ends with perspectives for further exploitation of the data, in particular with respect to the semantic characterisation of enumerative structures.</abstract>
<identifier type="citekey">pery-woodley-etal-2017-corpus</identifier>
<identifier type="doi">10.5087/dad.2017.103</identifier>
<location>
<url>https://aclanthology.org/2017.dnd-8.13/</url>
</location>
<part>
<date>2017-01</date>
<detail type="volume"><number>8</number></detail>
<extent unit="page">
<start>66</start>
<end>105</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T A corpus-driven approach to discourse organisation: from cues to complex markers
%A Péry-Woodley, Marie-Paule
%A Ho-Dac, Lydia-Mai
%A Rebeyrolle, Josette
%A Tanguy, Ludovic
%A Fabre, C‘ecile
%J Dialogue & Discourse
%D 2017
%8 January
%V 8
%I University of Bielefeld
%C Bielefeld, Germany
%F pery-woodley-etal-2017-corpus
%X This paper reports on an experiment implementing a data-intensive approach to discourse organisation. Its focus is on enumerative structures envisaged as a type of textual pattern in a sequentiality-oriented approach to discourse. On the basis of a large-scale annotation exercise calling upon automatic feature mark-up alongside manual annotation, we explore a method to identify complex discourse markers seen as configurations of cues. The presentation of the background to what is termed “multi-level annotation” is organised around four issues: linearity, complexity of discourse markers, top-down processing, granularity and the multi-level nature of discourse structures. In this context, enumerative structures seem to deserve scrutiny for a number of reasons: they are frequent structures appearing at different granularity levels, they are signalled by a variety of devices appearing to work together in complex ways, and they combine a textual role (discourse organisation) with an ideational role (categorisation). We describe the annotation procedure and experimental framework which resulted in nearly 1,000 enumerative structures being annotated in a diversified corpus of over 600,000 words. The results of two approaches to the rich data produced are then presented: firstly, a descriptive survey highlights considerable variation in length and composition, while showing enumerative structure to be a basic strategy resorted to in all three sub-corpora, and leads to a granularity-based typology of the annotated structures; secondly, recurrent cue configurations—our “complex markers”—are identified by the application of data mining methods. The paper ends with perspectives for further exploitation of the data, in particular with respect to the semantic characterisation of enumerative structures.
%R 10.5087/dad.2017.103
%U https://aclanthology.org/2017.dnd-8.13/
%U https://doi.org/10.5087/dad.2017.103
%P 66-105
Markdown (Informal)
[A corpus-driven approach to discourse organisation: from cues to complex markers](https://aclanthology.org/2017.dnd-8.13/) (Péry-Woodley et al., DND 2017)
ACL