@inproceedings{chamezopoulos-etal-2024-overview,
title = "Overview of the {D}ag{P}ap24 Shared Task on Detecting Automatically Generated Scientific Paper",
author = "Chamezopoulos, Savvas and
Herrmannova, Drahomira and
De Waard, Anita and
Herrmannova, Drahomira and
Rosati, Domenic and
Kashnitsky, Yury",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sdp-1.2",
pages = "7--11",
abstract = "This paper provides an overview of the 2024 ACL Scholarly Document Processing workshop shared task on the detection of automatically generated scientific papers. Unlike our previous task, which focused on the binary classification of whether scientific passages were machine-generated or not, one likely use case for text generation technology in scientific writing is to intersperse human-written text with passages of machine-generated text. We frame the detection problem as a multiclass span classification task: given an expert of text, label token spans in the text as human-written or machine-generated We shared a dataset containing excerpts from human-written papers as well as artificially generated content collected by Elsevier publishing and editorial teams. As a test set, the participants were provided with a corpus of openly accessible human-written as well as generated papers from the same scientific domains of documents. The shared task saw 457 submissions across 28 participating teams and resulted in three published technical reports. We discuss our findings from the shared task in this overview paper.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chamezopoulos-etal-2024-overview">
<titleInfo>
<title>Overview of the DagPap24 Shared Task on Detecting Automatically Generated Scientific Paper</title>
</titleInfo>
<name type="personal">
<namePart type="given">Savvas</namePart>
<namePart type="family">Chamezopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Drahomira</namePart>
<namePart type="family">Herrmannova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">De Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Domenic</namePart>
<namePart type="family">Rosati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yury</namePart>
<namePart type="family">Kashnitsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoonjoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper provides an overview of the 2024 ACL Scholarly Document Processing workshop shared task on the detection of automatically generated scientific papers. Unlike our previous task, which focused on the binary classification of whether scientific passages were machine-generated or not, one likely use case for text generation technology in scientific writing is to intersperse human-written text with passages of machine-generated text. We frame the detection problem as a multiclass span classification task: given an expert of text, label token spans in the text as human-written or machine-generated We shared a dataset containing excerpts from human-written papers as well as artificially generated content collected by Elsevier publishing and editorial teams. As a test set, the participants were provided with a corpus of openly accessible human-written as well as generated papers from the same scientific domains of documents. The shared task saw 457 submissions across 28 participating teams and resulted in three published technical reports. We discuss our findings from the shared task in this overview paper.</abstract>
<identifier type="citekey">chamezopoulos-etal-2024-overview</identifier>
<location>
<url>https://aclanthology.org/2024.sdp-1.2</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>7</start>
<end>11</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overview of the DagPap24 Shared Task on Detecting Automatically Generated Scientific Paper
%A Chamezopoulos, Savvas
%A Herrmannova, Drahomira
%A De Waard, Anita
%A Rosati, Domenic
%A Kashnitsky, Yury
%Y Ghosal, Tirthankar
%Y Singh, Amanpreet
%Y Waard, Anita
%Y Mayr, Philipp
%Y Naik, Aakanksha
%Y Weller, Orion
%Y Lee, Yoonjoo
%Y Shen, Shannon
%Y Qin, Yanxia
%S Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F chamezopoulos-etal-2024-overview
%X This paper provides an overview of the 2024 ACL Scholarly Document Processing workshop shared task on the detection of automatically generated scientific papers. Unlike our previous task, which focused on the binary classification of whether scientific passages were machine-generated or not, one likely use case for text generation technology in scientific writing is to intersperse human-written text with passages of machine-generated text. We frame the detection problem as a multiclass span classification task: given an expert of text, label token spans in the text as human-written or machine-generated We shared a dataset containing excerpts from human-written papers as well as artificially generated content collected by Elsevier publishing and editorial teams. As a test set, the participants were provided with a corpus of openly accessible human-written as well as generated papers from the same scientific domains of documents. The shared task saw 457 submissions across 28 participating teams and resulted in three published technical reports. We discuss our findings from the shared task in this overview paper.
%U https://aclanthology.org/2024.sdp-1.2
%P 7-11
Markdown (Informal)
[Overview of the DagPap24 Shared Task on Detecting Automatically Generated Scientific Paper](https://aclanthology.org/2024.sdp-1.2) (Chamezopoulos et al., sdp-WS 2024)
ACL