@inproceedings{niklaus-etal-2019-minwikisplit,
title = "{M}in{W}iki{S}plit: A Sentence Splitting Corpus with Minimal Propositions",
author = "Niklaus, Christina and
Freitas, Andr{\'e} and
Handschuh, Siegfried",
editor = "van Deemter, Kees and
Lin, Chenghua and
Takamura, Hiroya",
booktitle = "Proceedings of the 12th International Conference on Natural Language Generation",
month = oct # "–" # nov,
year = "2019",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-8615/",
doi = "10.18653/v1/W19-8615",
pages = "118--123",
abstract = "We compiled a new sentence splitting corpus that is composed of 203K pairs of aligned complex source and simplified target sentences. Contrary to previously proposed text simplification corpora, which contain only a small number of split examples, we present a dataset where each input sentence is broken down into a set of minimal propositions, i.e. a sequence of sound, self-contained utterances with each of them presenting a minimal semantic unit that cannot be further decomposed into meaningful propositions. This corpus is useful for developing sentence splitting approaches that learn how to transform sentences with a complex linguistic structure into a fine-grained representation of short sentences that present a simple and more regular structure which is easier to process for downstream applications and thus facilitates and improves their performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="niklaus-etal-2019-minwikisplit">
<titleInfo>
<title>MinWikiSplit: A Sentence Splitting Corpus with Minimal Propositions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christina</namePart>
<namePart type="family">Niklaus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siegfried</namePart>
<namePart type="family">Handschuh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-oct–nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenghua</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We compiled a new sentence splitting corpus that is composed of 203K pairs of aligned complex source and simplified target sentences. Contrary to previously proposed text simplification corpora, which contain only a small number of split examples, we present a dataset where each input sentence is broken down into a set of minimal propositions, i.e. a sequence of sound, self-contained utterances with each of them presenting a minimal semantic unit that cannot be further decomposed into meaningful propositions. This corpus is useful for developing sentence splitting approaches that learn how to transform sentences with a complex linguistic structure into a fine-grained representation of short sentences that present a simple and more regular structure which is easier to process for downstream applications and thus facilitates and improves their performance.</abstract>
<identifier type="citekey">niklaus-etal-2019-minwikisplit</identifier>
<identifier type="doi">10.18653/v1/W19-8615</identifier>
<location>
<url>https://aclanthology.org/W19-8615/</url>
</location>
<part>
<date>2019-oct–nov</date>
<extent unit="page">
<start>118</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MinWikiSplit: A Sentence Splitting Corpus with Minimal Propositions
%A Niklaus, Christina
%A Freitas, André
%A Handschuh, Siegfried
%Y van Deemter, Kees
%Y Lin, Chenghua
%Y Takamura, Hiroya
%S Proceedings of the 12th International Conference on Natural Language Generation
%D 2019
%8 oct–nov
%I Association for Computational Linguistics
%C Tokyo, Japan
%F niklaus-etal-2019-minwikisplit
%X We compiled a new sentence splitting corpus that is composed of 203K pairs of aligned complex source and simplified target sentences. Contrary to previously proposed text simplification corpora, which contain only a small number of split examples, we present a dataset where each input sentence is broken down into a set of minimal propositions, i.e. a sequence of sound, self-contained utterances with each of them presenting a minimal semantic unit that cannot be further decomposed into meaningful propositions. This corpus is useful for developing sentence splitting approaches that learn how to transform sentences with a complex linguistic structure into a fine-grained representation of short sentences that present a simple and more regular structure which is easier to process for downstream applications and thus facilitates and improves their performance.
%R 10.18653/v1/W19-8615
%U https://aclanthology.org/W19-8615/
%U https://doi.org/10.18653/v1/W19-8615
%P 118-123
Markdown (Informal)
[MinWikiSplit: A Sentence Splitting Corpus with Minimal Propositions](https://aclanthology.org/W19-8615/) (Niklaus et al., INLG 2019)
ACL