@inproceedings{bourgonje-schafer-2019-multi,
title = "Multi-lingual and Cross-genre Discourse Unit Segmentation",
author = {Bourgonje, Peter and
Sch{\"a}fer, Robin},
editor = "Zeldes, Amir and
Das, Debopam and
Galani, Erick Maziero and
Antonio, Juliano Desiderato and
Iruskieta, Mikel",
booktitle = "Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019",
month = jun,
year = "2019",
address = "Minneapolis, MN",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2714",
doi = "10.18653/v1/W19-2714",
pages = "105--114",
abstract = "We describe a series of experiments applied to data sets from different languages and genres annotated for coherence relations according to different theoretical frameworks. Specifically, we investigate the feasibility of a unified (theory-neutral) approach toward discourse segmentation; a process which divides a text into minimal discourse units that are involved in s coherence relation. We apply a RandomForest and an LSTM based approach for all data sets, and we improve over a simple baseline assuming simple sentence or clause-like segmentation. Performance however varies a lot depending on language, and more importantly genre, with f-scores ranging from 73.00 to 94.47.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bourgonje-schafer-2019-multi">
<titleInfo>
<title>Multi-lingual and Cross-genre Discourse Unit Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Bourgonje</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Schäfer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debopam</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erick</namePart>
<namePart type="given">Maziero</namePart>
<namePart type="family">Galani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juliano</namePart>
<namePart type="given">Desiderato</namePart>
<namePart type="family">Antonio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikel</namePart>
<namePart type="family">Iruskieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, MN</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe a series of experiments applied to data sets from different languages and genres annotated for coherence relations according to different theoretical frameworks. Specifically, we investigate the feasibility of a unified (theory-neutral) approach toward discourse segmentation; a process which divides a text into minimal discourse units that are involved in s coherence relation. We apply a RandomForest and an LSTM based approach for all data sets, and we improve over a simple baseline assuming simple sentence or clause-like segmentation. Performance however varies a lot depending on language, and more importantly genre, with f-scores ranging from 73.00 to 94.47.</abstract>
<identifier type="citekey">bourgonje-schafer-2019-multi</identifier>
<identifier type="doi">10.18653/v1/W19-2714</identifier>
<location>
<url>https://aclanthology.org/W19-2714</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>105</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-lingual and Cross-genre Discourse Unit Segmentation
%A Bourgonje, Peter
%A Schäfer, Robin
%Y Zeldes, Amir
%Y Das, Debopam
%Y Galani, Erick Maziero
%Y Antonio, Juliano Desiderato
%Y Iruskieta, Mikel
%S Proceedings of the Workshop on Discourse Relation Parsing and Treebanking 2019
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, MN
%F bourgonje-schafer-2019-multi
%X We describe a series of experiments applied to data sets from different languages and genres annotated for coherence relations according to different theoretical frameworks. Specifically, we investigate the feasibility of a unified (theory-neutral) approach toward discourse segmentation; a process which divides a text into minimal discourse units that are involved in s coherence relation. We apply a RandomForest and an LSTM based approach for all data sets, and we improve over a simple baseline assuming simple sentence or clause-like segmentation. Performance however varies a lot depending on language, and more importantly genre, with f-scores ranging from 73.00 to 94.47.
%R 10.18653/v1/W19-2714
%U https://aclanthology.org/W19-2714
%U https://doi.org/10.18653/v1/W19-2714
%P 105-114
Markdown (Informal)
[Multi-lingual and Cross-genre Discourse Unit Segmentation](https://aclanthology.org/W19-2714) (Bourgonje & Schäfer, NAACL 2019)
ACL