@inproceedings{wang-etal-2019-application,
title = "Application of an Automatic Plagiarism Detection System in a Large-scale Assessment of {E}nglish Speaking Proficiency",
author = "Wang, Xinhao and
Evanini, Keelan and
Mulholland, Matthew and
Qian, Yao and
Bruno, James V.",
editor = "Yannakoudakis, Helen and
Kochmar, Ekaterina and
Leacock, Claudia and
Madnani, Nitin and
Pil{\'a}n, Ildik{\'o} and
Zesch, Torsten",
booktitle = "Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4445",
doi = "10.18653/v1/W19-4445",
pages = "435--443",
abstract = "This study aims to build an automatic system for the detection of plagiarized spoken responses in the context of an assessment of English speaking proficiency for non-native speakers. Classification models were trained to distinguish between plagiarized and non-plagiarized responses with two different types of features: text-to-text content similarity measures, which are commonly used in the task of plagiarism detection for written documents, and speaking proficiency measures, which were specifically designed for spontaneous speech and extracted using an automated speech scoring system. The experiments were first conducted on a large data set drawn from an operational English proficiency assessment across multiple years, and the best classifier on this heavily imbalanced data set resulted in an F1-score of 0.761 on the plagiarized class. This system was then validated on operational responses collected from a single administration of the assessment and achieved a recall of 0.897. The results indicate that the proposed system can potentially be used to improve the validity of both human and automated assessment of non-native spoken English.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2019-application">
<titleInfo>
<title>Application of an Automatic Plagiarism Detection System in a Large-scale Assessment of English Speaking Proficiency</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xinhao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keelan</namePart>
<namePart type="family">Evanini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Mulholland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yao</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Bruno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study aims to build an automatic system for the detection of plagiarized spoken responses in the context of an assessment of English speaking proficiency for non-native speakers. Classification models were trained to distinguish between plagiarized and non-plagiarized responses with two different types of features: text-to-text content similarity measures, which are commonly used in the task of plagiarism detection for written documents, and speaking proficiency measures, which were specifically designed for spontaneous speech and extracted using an automated speech scoring system. The experiments were first conducted on a large data set drawn from an operational English proficiency assessment across multiple years, and the best classifier on this heavily imbalanced data set resulted in an F1-score of 0.761 on the plagiarized class. This system was then validated on operational responses collected from a single administration of the assessment and achieved a recall of 0.897. The results indicate that the proposed system can potentially be used to improve the validity of both human and automated assessment of non-native spoken English.</abstract>
<identifier type="citekey">wang-etal-2019-application</identifier>
<identifier type="doi">10.18653/v1/W19-4445</identifier>
<location>
<url>https://aclanthology.org/W19-4445</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>435</start>
<end>443</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Application of an Automatic Plagiarism Detection System in a Large-scale Assessment of English Speaking Proficiency
%A Wang, Xinhao
%A Evanini, Keelan
%A Mulholland, Matthew
%A Qian, Yao
%A Bruno, James V.
%Y Yannakoudakis, Helen
%Y Kochmar, Ekaterina
%Y Leacock, Claudia
%Y Madnani, Nitin
%Y Pilán, Ildikó
%Y Zesch, Torsten
%S Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F wang-etal-2019-application
%X This study aims to build an automatic system for the detection of plagiarized spoken responses in the context of an assessment of English speaking proficiency for non-native speakers. Classification models were trained to distinguish between plagiarized and non-plagiarized responses with two different types of features: text-to-text content similarity measures, which are commonly used in the task of plagiarism detection for written documents, and speaking proficiency measures, which were specifically designed for spontaneous speech and extracted using an automated speech scoring system. The experiments were first conducted on a large data set drawn from an operational English proficiency assessment across multiple years, and the best classifier on this heavily imbalanced data set resulted in an F1-score of 0.761 on the plagiarized class. This system was then validated on operational responses collected from a single administration of the assessment and achieved a recall of 0.897. The results indicate that the proposed system can potentially be used to improve the validity of both human and automated assessment of non-native spoken English.
%R 10.18653/v1/W19-4445
%U https://aclanthology.org/W19-4445
%U https://doi.org/10.18653/v1/W19-4445
%P 435-443
Markdown (Informal)
[Application of an Automatic Plagiarism Detection System in a Large-scale Assessment of English Speaking Proficiency](https://aclanthology.org/W19-4445) (Wang et al., BEA 2019)
ACL