@inproceedings{barron-cedeno-etal-2010-corpus,
title = "Corpus and Evaluation Measures for Automatic Plagiarism Detection",
author = "Barr{\'o}n-Cede{\~n}o, Alberto and
Potthast, Martin and
Rosso, Paolo and
Stein, Benno",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/35_Paper.pdf",
abstract = "The simple access to texts on digital libraries and the World Wide Web has led to an increased number of plagiarism cases in recent years, which renders manual plagiarism detection infeasible at large. Various methods for automatic plagiarism detection have been developed whose objective is to assist human experts in the analysis of documents for plagiarism. The methods can be divided into two main approaches: intrinsic and external. Unlike other tasks in natural language processing and information retrieval, it is not possible to publish a collection of real plagiarism cases for evaluation purposes since they cannot be properly anonymized. Therefore, current evaluations found in the literature are incomparable and, very often not even reproducible. Our contribution in this respect is a newly developed large-scale corpus of artificial plagiarism useful for the evaluation of intrinsic as well as external plagiarism detection. Additionally, new detection performance measures tailored to the evaluation of plagiarism detection algorithms are proposed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barron-cedeno-etal-2010-corpus">
<titleInfo>
<title>Corpus and Evaluation Measures for Automatic Plagiarism Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Barrón-Cedeño</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Potthast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Rosso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Stein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The simple access to texts on digital libraries and the World Wide Web has led to an increased number of plagiarism cases in recent years, which renders manual plagiarism detection infeasible at large. Various methods for automatic plagiarism detection have been developed whose objective is to assist human experts in the analysis of documents for plagiarism. The methods can be divided into two main approaches: intrinsic and external. Unlike other tasks in natural language processing and information retrieval, it is not possible to publish a collection of real plagiarism cases for evaluation purposes since they cannot be properly anonymized. Therefore, current evaluations found in the literature are incomparable and, very often not even reproducible. Our contribution in this respect is a newly developed large-scale corpus of artificial plagiarism useful for the evaluation of intrinsic as well as external plagiarism detection. Additionally, new detection performance measures tailored to the evaluation of plagiarism detection algorithms are proposed.</abstract>
<identifier type="citekey">barron-cedeno-etal-2010-corpus</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/35_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus and Evaluation Measures for Automatic Plagiarism Detection
%A Barrón-Cedeño, Alberto
%A Potthast, Martin
%A Rosso, Paolo
%A Stein, Benno
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F barron-cedeno-etal-2010-corpus
%X The simple access to texts on digital libraries and the World Wide Web has led to an increased number of plagiarism cases in recent years, which renders manual plagiarism detection infeasible at large. Various methods for automatic plagiarism detection have been developed whose objective is to assist human experts in the analysis of documents for plagiarism. The methods can be divided into two main approaches: intrinsic and external. Unlike other tasks in natural language processing and information retrieval, it is not possible to publish a collection of real plagiarism cases for evaluation purposes since they cannot be properly anonymized. Therefore, current evaluations found in the literature are incomparable and, very often not even reproducible. Our contribution in this respect is a newly developed large-scale corpus of artificial plagiarism useful for the evaluation of intrinsic as well as external plagiarism detection. Additionally, new detection performance measures tailored to the evaluation of plagiarism detection algorithms are proposed.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/35_Paper.pdf
Markdown (Informal)
[Corpus and Evaluation Measures for Automatic Plagiarism Detection](http://www.lrec-conf.org/proceedings/lrec2010/pdf/35_Paper.pdf) (Barrón-Cedeño et al., LREC 2010)
ACL