@inproceedings{prevot-etal-2025-segmenting,
title = "Segmenting a Large {F}rench Meeting Corpus into Elementary Discourse Units",
author = "Pr{\'e}vot, Laurent and
Bertrand, Roxane and
Hunter, Julie",
editor = "B{\'e}chet, Fr{\'e}d{\'e}ric and
Lef{\`e}vre, Fabrice and
Asher, Nicholas and
Kim, Seokhwan and
Merlin, Teva",
booktitle = "Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = aug,
year = "2025",
address = "Avignon, France",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sigdial-1.14/",
pages = "183--191",
abstract = "Despite growing interest in discourse-related tasks, the limited quantity and diversity of discourse-annotated data remain a major issue. Existing resources are largely based on written corpora, while spoken conversational genres are underrepresented. Although discourse segmentation into elementary discourse units (EDUs) is considered to be nearly solved for canonical written texts, conversational spontaneous speech transcripts present different challenges. In this paper, we introduce a large French corpus of segmented meeting dialogues, including 20 hours of manually transcribed and discourse-annotated conversations, and 80 hours of automatically transcribed and discourse-segmented data. We describe our annotation campaign, discuss inter-annotator agreement and segmentation guidelines, and present results from fine-tuning a model for EDU segmentation on this resource."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prevot-etal-2025-segmenting">
<titleInfo>
<title>Segmenting a Large French Meeting Corpus into Elementary Discourse Units</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Prévot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roxane</namePart>
<namePart type="family">Bertrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Hunter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrice</namePart>
<namePart type="family">Lefèvre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Asher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teva</namePart>
<namePart type="family">Merlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Avignon, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite growing interest in discourse-related tasks, the limited quantity and diversity of discourse-annotated data remain a major issue. Existing resources are largely based on written corpora, while spoken conversational genres are underrepresented. Although discourse segmentation into elementary discourse units (EDUs) is considered to be nearly solved for canonical written texts, conversational spontaneous speech transcripts present different challenges. In this paper, we introduce a large French corpus of segmented meeting dialogues, including 20 hours of manually transcribed and discourse-annotated conversations, and 80 hours of automatically transcribed and discourse-segmented data. We describe our annotation campaign, discuss inter-annotator agreement and segmentation guidelines, and present results from fine-tuning a model for EDU segmentation on this resource.</abstract>
<identifier type="citekey">prevot-etal-2025-segmenting</identifier>
<location>
<url>https://aclanthology.org/2025.sigdial-1.14/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>183</start>
<end>191</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Segmenting a Large French Meeting Corpus into Elementary Discourse Units
%A Prévot, Laurent
%A Bertrand, Roxane
%A Hunter, Julie
%Y Béchet, Frédéric
%Y Lefèvre, Fabrice
%Y Asher, Nicholas
%Y Kim, Seokhwan
%Y Merlin, Teva
%S Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2025
%8 August
%I Association for Computational Linguistics
%C Avignon, France
%F prevot-etal-2025-segmenting
%X Despite growing interest in discourse-related tasks, the limited quantity and diversity of discourse-annotated data remain a major issue. Existing resources are largely based on written corpora, while spoken conversational genres are underrepresented. Although discourse segmentation into elementary discourse units (EDUs) is considered to be nearly solved for canonical written texts, conversational spontaneous speech transcripts present different challenges. In this paper, we introduce a large French corpus of segmented meeting dialogues, including 20 hours of manually transcribed and discourse-annotated conversations, and 80 hours of automatically transcribed and discourse-segmented data. We describe our annotation campaign, discuss inter-annotator agreement and segmentation guidelines, and present results from fine-tuning a model for EDU segmentation on this resource.
%U https://aclanthology.org/2025.sigdial-1.14/
%P 183-191
Markdown (Informal)
[Segmenting a Large French Meeting Corpus into Elementary Discourse Units](https://aclanthology.org/2025.sigdial-1.14/) (Prévot et al., SIGDIAL 2025)
ACL