@inproceedings{muller-etal-2019-tony,
title = "{T}o{N}y: Contextual embeddings for accurate multilingual discourse segmentation of full documents",
author = "Muller, Philippe and
Braud, Chlo{\'e} and
Morey, Mathieu",
editor = "Zeldes, Amir and
Das, Debopam and
Galani, Erick Maziero and
Antonio, Juliano Desiderato and
Iruskieta, Mikel",
booktitle = "Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019",
month = jun,
year = "2019",
address = "Minneapolis, MN",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2715",
doi = "10.18653/v1/W19-2715",
pages = "115--124",
abstract = "Segmentation is the first step in building practical discourse parsers, and is often neglected in discourse parsing studies. The goal is to identify the minimal spans of text to be linked by discourse relations, or to isolate explicit marking of discourse relations. Existing systems on English report F1 scores as high as 95{\%}, but they generally assume gold sentence boundaries and are restricted to English newswire texts annotated within the RST framework. This article presents a generic approach and a system, ToNy, a discourse segmenter developed for the DisRPT shared task where multiple discourse representation schemes, languages and domains are represented. In our experiments, we found that a straightforward sequence prediction architecture with pretrained contextual embeddings is sufficient to reach performance levels comparable to existing systems, when separately trained on each corpus. We report performance between 81{\%} and 96{\%} in F1 score. We also observed that discourse segmentation models only display a moderate generalization capability, even within the same language and discourse representation scheme.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="muller-etal-2019-tony">
<titleInfo>
<title>ToNy: Contextual embeddings for accurate multilingual discourse segmentation of full documents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Muller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chloé</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Morey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debopam</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erick</namePart>
<namePart type="given">Maziero</namePart>
<namePart type="family">Galani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juliano</namePart>
<namePart type="given">Desiderato</namePart>
<namePart type="family">Antonio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Iruskieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, MN</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Segmentation is the first step in building practical discourse parsers, and is often neglected in discourse parsing studies. The goal is to identify the minimal spans of text to be linked by discourse relations, or to isolate explicit marking of discourse relations. Existing systems on English report F1 scores as high as 95%, but they generally assume gold sentence boundaries and are restricted to English newswire texts annotated within the RST framework. This article presents a generic approach and a system, ToNy, a discourse segmenter developed for the DisRPT shared task where multiple discourse representation schemes, languages and domains are represented. In our experiments, we found that a straightforward sequence prediction architecture with pretrained contextual embeddings is sufficient to reach performance levels comparable to existing systems, when separately trained on each corpus. We report performance between 81% and 96% in F1 score. We also observed that discourse segmentation models only display a moderate generalization capability, even within the same language and discourse representation scheme.</abstract>
<identifier type="citekey">muller-etal-2019-tony</identifier>
<identifier type="doi">10.18653/v1/W19-2715</identifier>
<location>
<url>https://aclanthology.org/W19-2715</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>115</start>
<end>124</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ToNy: Contextual embeddings for accurate multilingual discourse segmentation of full documents
%A Muller, Philippe
%A Braud, Chloé
%A Morey, Mathieu
%Y Zeldes, Amir
%Y Das, Debopam
%Y Galani, Erick Maziero
%Y Antonio, Juliano Desiderato
%Y Iruskieta, Mikel
%S Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, MN
%F muller-etal-2019-tony
%X Segmentation is the first step in building practical discourse parsers, and is often neglected in discourse parsing studies. The goal is to identify the minimal spans of text to be linked by discourse relations, or to isolate explicit marking of discourse relations. Existing systems on English report F1 scores as high as 95%, but they generally assume gold sentence boundaries and are restricted to English newswire texts annotated within the RST framework. This article presents a generic approach and a system, ToNy, a discourse segmenter developed for the DisRPT shared task where multiple discourse representation schemes, languages and domains are represented. In our experiments, we found that a straightforward sequence prediction architecture with pretrained contextual embeddings is sufficient to reach performance levels comparable to existing systems, when separately trained on each corpus. We report performance between 81% and 96% in F1 score. We also observed that discourse segmentation models only display a moderate generalization capability, even within the same language and discourse representation scheme.
%R 10.18653/v1/W19-2715
%U https://aclanthology.org/W19-2715
%U https://doi.org/10.18653/v1/W19-2715
%P 115-124
Markdown (Informal)
[ToNy: Contextual embeddings for accurate multilingual discourse segmentation of full documents](https://aclanthology.org/W19-2715) (Muller et al., NAACL 2019)
ACL