@inproceedings{max-wisniewski-2010-mining,
title = "Mining Naturally-occurring Corrections and Paraphrases from {W}ikipedia{'}s Revision History",
author = "Max, Aur{\'e}lien and
Wisniewski, Guillaume",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/827_Paper.pdf",
abstract = "Naturally-occurring instances of linguistic phenomena are important both for training and for evaluating automatic text processing. When available in large quantities, they also prove interesting material for linguistic studies. In this article, we present WiCoPaCo (Wikipedia Correction and Paraphrase Corpus), a new freely-available resource built by automatically mining Wikipedias revision history. The WiCoPaCo corpus focuses on local modifications made by human revisors and include various types of corrections (such as spelling error or typographical corrections) and rewritings, which can be categorized broadly into meaning-preserving and meaning-altering revisions. We present an initial hand-built typology of these revisions, but the resource allows for any possible annotation scheme. We discuss the main motivations for building such a resource and describe the main technical details guiding its construction. We also present applications and data analysis on French and report initial results on spelling error correction and morphosyntactic rewriting. The WiCoPaCo corpus can be freely downloaded from \url{http://wicopaco.limsi.fr}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="max-wisniewski-2010-mining">
<titleInfo>
<title>Mining Naturally-occurring Corrections and Paraphrases from Wikipedia’s Revision History</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aurélien</namePart>
<namePart type="family">Max</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Wisniewski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Naturally-occurring instances of linguistic phenomena are important both for training and for evaluating automatic text processing. When available in large quantities, they also prove interesting material for linguistic studies. In this article, we present WiCoPaCo (Wikipedia Correction and Paraphrase Corpus), a new freely-available resource built by automatically mining Wikipedias revision history. The WiCoPaCo corpus focuses on local modifications made by human revisors and include various types of corrections (such as spelling error or typographical corrections) and rewritings, which can be categorized broadly into meaning-preserving and meaning-altering revisions. We present an initial hand-built typology of these revisions, but the resource allows for any possible annotation scheme. We discuss the main motivations for building such a resource and describe the main technical details guiding its construction. We also present applications and data analysis on French and report initial results on spelling error correction and morphosyntactic rewriting. The WiCoPaCo corpus can be freely downloaded from http://wicopaco.limsi.fr.</abstract>
<identifier type="citekey">max-wisniewski-2010-mining</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/827_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mining Naturally-occurring Corrections and Paraphrases from Wikipedia’s Revision History
%A Max, Aurélien
%A Wisniewski, Guillaume
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F max-wisniewski-2010-mining
%X Naturally-occurring instances of linguistic phenomena are important both for training and for evaluating automatic text processing. When available in large quantities, they also prove interesting material for linguistic studies. In this article, we present WiCoPaCo (Wikipedia Correction and Paraphrase Corpus), a new freely-available resource built by automatically mining Wikipedias revision history. The WiCoPaCo corpus focuses on local modifications made by human revisors and include various types of corrections (such as spelling error or typographical corrections) and rewritings, which can be categorized broadly into meaning-preserving and meaning-altering revisions. We present an initial hand-built typology of these revisions, but the resource allows for any possible annotation scheme. We discuss the main motivations for building such a resource and describe the main technical details guiding its construction. We also present applications and data analysis on French and report initial results on spelling error correction and morphosyntactic rewriting. The WiCoPaCo corpus can be freely downloaded from http://wicopaco.limsi.fr.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/827_Paper.pdf
Markdown (Informal)
[Mining Naturally-occurring Corrections and Paraphrases from Wikipedia’s Revision History](http://www.lrec-conf.org/proceedings/lrec2010/pdf/827_Paper.pdf) (Max & Wisniewski, LREC 2010)
ACL