@inproceedings{mohammadi-etal-2020-creation,
title = "On the Creation of a Corpus for Coherence Evaluation of Discursive Units",
author = "Mohammadi, Elham and
Beiko, Timothe and
Kosseim, Leila",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.134",
pages = "1067--1072",
abstract = "In this paper, we report on our experiments towards the creation of a corpus for coherence evaluation. Most corpora for textual coherence evaluation are composed of randomly shuffled sentences that focus on sentence ordering, regardless of whether the sentences were originally related by a discourse relation. To the best of our knowledge, no publicly available corpus has been designed specifically for the evaluation of coherence of known discursive units. In this paper, we focus on coherence modeling at the intra-discursive level and describe our approach to build a corpus of incoherent pairs of sentences. We experimented with a variety of corruption strategies to create synthetic incoherent pairs of discourse arguments from coherent ones. Using discourse argument pairs from the Penn Discourse Tree Bank, we generate incoherent discourse argument pairs, by swapping either their discourse connective or a discourse argument. To evaluate how incoherent the generated corpora are, we use a convolutional neural network to try to distinguish the original pairs from the corrupted ones. Results of the classifier as well as a manual inspection of the corpora show that generating such corpora is still a challenge as the generated instances are clearly not {``}incoherent enough{''}, indicating that more effort should be spent on developing more robust ways of generating incoherent corpora.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mohammadi-etal-2020-creation">
<titleInfo>
<title>On the Creation of a Corpus for Coherence Evaluation of Discursive Units</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elham</namePart>
<namePart type="family">Mohammadi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothe</namePart>
<namePart type="family">Beiko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leila</namePart>
<namePart type="family">Kosseim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>In this paper, we report on our experiments towards the creation of a corpus for coherence evaluation. Most corpora for textual coherence evaluation are composed of randomly shuffled sentences that focus on sentence ordering, regardless of whether the sentences were originally related by a discourse relation. To the best of our knowledge, no publicly available corpus has been designed specifically for the evaluation of coherence of known discursive units. In this paper, we focus on coherence modeling at the intra-discursive level and describe our approach to build a corpus of incoherent pairs of sentences. We experimented with a variety of corruption strategies to create synthetic incoherent pairs of discourse arguments from coherent ones. Using discourse argument pairs from the Penn Discourse Tree Bank, we generate incoherent discourse argument pairs, by swapping either their discourse connective or a discourse argument. To evaluate how incoherent the generated corpora are, we use a convolutional neural network to try to distinguish the original pairs from the corrupted ones. Results of the classifier as well as a manual inspection of the corpora show that generating such corpora is still a challenge as the generated instances are clearly not “incoherent enough”, indicating that more effort should be spent on developing more robust ways of generating incoherent corpora.</abstract>
<identifier type="citekey">mohammadi-etal-2020-creation</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.134</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>1067</start>
<end>1072</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Creation of a Corpus for Coherence Evaluation of Discursive Units
%A Mohammadi, Elham
%A Beiko, Timothe
%A Kosseim, Leila
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F mohammadi-etal-2020-creation
%X In this paper, we report on our experiments towards the creation of a corpus for coherence evaluation. Most corpora for textual coherence evaluation are composed of randomly shuffled sentences that focus on sentence ordering, regardless of whether the sentences were originally related by a discourse relation. To the best of our knowledge, no publicly available corpus has been designed specifically for the evaluation of coherence of known discursive units. In this paper, we focus on coherence modeling at the intra-discursive level and describe our approach to build a corpus of incoherent pairs of sentences. We experimented with a variety of corruption strategies to create synthetic incoherent pairs of discourse arguments from coherent ones. Using discourse argument pairs from the Penn Discourse Tree Bank, we generate incoherent discourse argument pairs, by swapping either their discourse connective or a discourse argument. To evaluate how incoherent the generated corpora are, we use a convolutional neural network to try to distinguish the original pairs from the corrupted ones. Results of the classifier as well as a manual inspection of the corpora show that generating such corpora is still a challenge as the generated instances are clearly not “incoherent enough”, indicating that more effort should be spent on developing more robust ways of generating incoherent corpora.
%U https://aclanthology.org/2020.lrec-1.134
%P 1067-1072
Markdown (Informal)
[On the Creation of a Corpus for Coherence Evaluation of Discursive Units](https://aclanthology.org/2020.lrec-1.134) (Mohammadi et al., LREC 2020)
ACL