@inproceedings{botha-etal-2018-learning,
    title = "Learning To Split and Rephrase From {W}ikipedia Edit History",
    author = "Botha, Jan A.  and
      Faruqui, Manaal  and
      Alex, John  and
      Baldridge, Jason  and
      Das, Dipanjan",
    editor = "Riloff, Ellen  and
      Chiang, David  and
      Hockenmaier, Julia  and
      Tsujii, Jun{'}ichi",
    booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
    month = oct # "-" # nov,
    year = "2018",
    address = "Brussels, Belgium",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/D18-1080/",
    doi = "10.18653/v1/D18-1080",
    pages = "732--737",
    abstract = "Split and rephrase is the task of breaking down a sentence into shorter ones that together convey the same meaning. We extract a rich new dataset for this task by mining Wikipedia{'}s edit history: WikiSplit contains one million naturally occurring sentence rewrites, providing sixty times more distinct split examples and a ninety times larger vocabulary than the WebSplit corpus introduced by Narayan et al. (2017) as a benchmark for this task. Incorporating WikiSplit as training data produces a model with qualitatively better predictions that score 32 BLEU points above the prior best result on the WebSplit benchmark."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="botha-etal-2018-learning">
    <titleInfo>
        <title>Learning To Split and Rephrase From Wikipedia Edit History</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Jan</namePart>
        <namePart type="given">A</namePart>
        <namePart type="family">Botha</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Manaal</namePart>
        <namePart type="family">Faruqui</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">John</namePart>
        <namePart type="family">Alex</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jason</namePart>
        <namePart type="family">Baldridge</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Dipanjan</namePart>
        <namePart type="family">Das</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2018-oct-nov</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Ellen</namePart>
            <namePart type="family">Riloff</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">David</namePart>
            <namePart type="family">Chiang</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Julia</namePart>
            <namePart type="family">Hockenmaier</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jun’ichi</namePart>
            <namePart type="family">Tsujii</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Brussels, Belgium</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Split and rephrase is the task of breaking down a sentence into shorter ones that together convey the same meaning. We extract a rich new dataset for this task by mining Wikipedia’s edit history: WikiSplit contains one million naturally occurring sentence rewrites, providing sixty times more distinct split examples and a ninety times larger vocabulary than the WebSplit corpus introduced by Narayan et al. (2017) as a benchmark for this task. Incorporating WikiSplit as training data produces a model with qualitatively better predictions that score 32 BLEU points above the prior best result on the WebSplit benchmark.</abstract>
    <identifier type="citekey">botha-etal-2018-learning</identifier>
    <identifier type="doi">10.18653/v1/D18-1080</identifier>
    <location>
        <url>https://aclanthology.org/D18-1080/</url>
    </location>
    <part>
        <date>2018-oct-nov</date>
        <extent unit="page">
            <start>732</start>
            <end>737</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning To Split and Rephrase From Wikipedia Edit History
%A Botha, Jan A.
%A Faruqui, Manaal
%A Alex, John
%A Baldridge, Jason
%A Das, Dipanjan
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F botha-etal-2018-learning
%X Split and rephrase is the task of breaking down a sentence into shorter ones that together convey the same meaning. We extract a rich new dataset for this task by mining Wikipedia’s edit history: WikiSplit contains one million naturally occurring sentence rewrites, providing sixty times more distinct split examples and a ninety times larger vocabulary than the WebSplit corpus introduced by Narayan et al. (2017) as a benchmark for this task. Incorporating WikiSplit as training data produces a model with qualitatively better predictions that score 32 BLEU points above the prior best result on the WebSplit benchmark.
%R 10.18653/v1/D18-1080
%U https://aclanthology.org/D18-1080/
%U https://doi.org/10.18653/v1/D18-1080
%P 732-737
Markdown (Informal)
[Learning To Split and Rephrase From Wikipedia Edit History](https://aclanthology.org/D18-1080/) (Botha et al., EMNLP 2018)
ACL
- Jan A. Botha, Manaal Faruqui, John Alex, Jason Baldridge, and Dipanjan Das. 2018. Learning To Split and Rephrase From Wikipedia Edit History. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 732–737, Brussels, Belgium. Association for Computational Linguistics.