@inproceedings{belyy-etal-2018-improved,
title = "Improved Evaluation Framework for Complex Plagiarism Detection",
author = "Belyy, Anton and
Dubova, Marina and
Nekrasov, Dmitry",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-2026",
doi = "10.18653/v1/P18-2026",
pages = "157--162",
abstract = "Plagiarism is a major issue in science and education. Complex plagiarism, such as plagiarism of ideas, is hard to detect, and therefore it is especially important to track improvement of methods correctly. In this paper, we study the performance of plagdet, the main measure for plagiarim detection, on manually paraphrased datasets (such as PAN Summary). We reveal its fallibility under certain conditions and propose an evaluation framework with normalization of inner terms, which is resilient to the dataset imbalance. We conclude with the experimental justification of the proposed measure. The implementation of the new framework is made publicly available as a Github repository.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="belyy-etal-2018-improved">
<titleInfo>
<title>Improved Evaluation Framework for Complex Plagiarism Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anton</namePart>
<namePart type="family">Belyy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Dubova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Nekrasov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Plagiarism is a major issue in science and education. Complex plagiarism, such as plagiarism of ideas, is hard to detect, and therefore it is especially important to track improvement of methods correctly. In this paper, we study the performance of plagdet, the main measure for plagiarim detection, on manually paraphrased datasets (such as PAN Summary). We reveal its fallibility under certain conditions and propose an evaluation framework with normalization of inner terms, which is resilient to the dataset imbalance. We conclude with the experimental justification of the proposed measure. The implementation of the new framework is made publicly available as a Github repository.</abstract>
<identifier type="citekey">belyy-etal-2018-improved</identifier>
<identifier type="doi">10.18653/v1/P18-2026</identifier>
<location>
<url>https://aclanthology.org/P18-2026</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>157</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improved Evaluation Framework for Complex Plagiarism Detection
%A Belyy, Anton
%A Dubova, Marina
%A Nekrasov, Dmitry
%Y Gurevych, Iryna
%Y Miyao, Yusuke
%S Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F belyy-etal-2018-improved
%X Plagiarism is a major issue in science and education. Complex plagiarism, such as plagiarism of ideas, is hard to detect, and therefore it is especially important to track improvement of methods correctly. In this paper, we study the performance of plagdet, the main measure for plagiarim detection, on manually paraphrased datasets (such as PAN Summary). We reveal its fallibility under certain conditions and propose an evaluation framework with normalization of inner terms, which is resilient to the dataset imbalance. We conclude with the experimental justification of the proposed measure. The implementation of the new framework is made publicly available as a Github repository.
%R 10.18653/v1/P18-2026
%U https://aclanthology.org/P18-2026
%U https://doi.org/10.18653/v1/P18-2026
%P 157-162
Markdown (Informal)
[Improved Evaluation Framework for Complex Plagiarism Detection](https://aclanthology.org/P18-2026) (Belyy et al., ACL 2018)
ACL
- Anton Belyy, Marina Dubova, and Dmitry Nekrasov. 2018. Improved Evaluation Framework for Complex Plagiarism Detection. In Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 157–162, Melbourne, Australia. Association for Computational Linguistics.