@inproceedings{zhao-etal-2024-utilizing,
title = "Utilizing an Ensemble Model with Anomalous Label Smoothing to Detect Generated Scientific Papers",
author = "Zhao, Yuan and
Gao, Junruo and
Wang, Junlin and
Luo, Gang and
Tang, Liang",
editor = "Ghosal, Tirthankar and
Singh, Amanpreet and
Waard, Anita and
Mayr, Philipp and
Naik, Aakanksha and
Weller, Orion and
Lee, Yoonjoo and
Shen, Shannon and
Qin, Yanxia",
booktitle = "Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sdp-1.12",
pages = "130--134",
abstract = "Generative AI, as it becomes increasingly integrated into our lives, has brought convenience, though some concerns have arisen regarding its potential impact on the rigor and authenticity of scientific research. To encourage the development of robust and reliable automatically-generated scientific text detection systems, the {``}DAGPap24: Detecting Automatically Generated Scientific Papers{''} competition was held and shared the same task with the 4th Workshop on Scholarly Document Processing (SDP 2024) to be held at ACL 2024. In the DAGPap24 competition, participants were tasked with constructing a generative text detection model that could accurately distinguish between the human written fragment, the synonym replacement fragment, the ChatGPT rewrite fragment, and the generated summary fragment of a paper. In this competition, we first conducted a comprehensive analysis of the training set to build a generative paper detection model. Then we tried various language models, including SciBERT, ALBERT, DeBERTa, RoBERTa, etc. After that, we introduced an Anomalous Label Smoothing (ALS) method and a majority voting method to improve the final results. Finally, we achieved 0.9948 and 0.9944 F1 scores during the development and testing phases respectively, and we achieved second place in the competition.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2024-utilizing">
<titleInfo>
<title>Utilizing an Ensemble Model with Anomalous Label Smoothing to Detect Generated Scientific Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junruo</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junlin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gang</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liang</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tirthankar</namePart>
<namePart type="family">Ghosal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanpreet</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anita</namePart>
<namePart type="family">Waard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mayr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakanksha</namePart>
<namePart type="family">Naik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orion</namePart>
<namePart type="family">Weller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoonjoo</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanxia</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generative AI, as it becomes increasingly integrated into our lives, has brought convenience, though some concerns have arisen regarding its potential impact on the rigor and authenticity of scientific research. To encourage the development of robust and reliable automatically-generated scientific text detection systems, the “DAGPap24: Detecting Automatically Generated Scientific Papers” competition was held and shared the same task with the 4th Workshop on Scholarly Document Processing (SDP 2024) to be held at ACL 2024. In the DAGPap24 competition, participants were tasked with constructing a generative text detection model that could accurately distinguish between the human written fragment, the synonym replacement fragment, the ChatGPT rewrite fragment, and the generated summary fragment of a paper. In this competition, we first conducted a comprehensive analysis of the training set to build a generative paper detection model. Then we tried various language models, including SciBERT, ALBERT, DeBERTa, RoBERTa, etc. After that, we introduced an Anomalous Label Smoothing (ALS) method and a majority voting method to improve the final results. Finally, we achieved 0.9948 and 0.9944 F1 scores during the development and testing phases respectively, and we achieved second place in the competition.</abstract>
<identifier type="citekey">zhao-etal-2024-utilizing</identifier>
<location>
<url>https://aclanthology.org/2024.sdp-1.12</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>130</start>
<end>134</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Utilizing an Ensemble Model with Anomalous Label Smoothing to Detect Generated Scientific Papers
%A Zhao, Yuan
%A Gao, Junruo
%A Wang, Junlin
%A Luo, Gang
%A Tang, Liang
%Y Ghosal, Tirthankar
%Y Singh, Amanpreet
%Y Waard, Anita
%Y Mayr, Philipp
%Y Naik, Aakanksha
%Y Weller, Orion
%Y Lee, Yoonjoo
%Y Shen, Shannon
%Y Qin, Yanxia
%S Proceedings of the Fourth Workshop on Scholarly Document Processing (SDP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F zhao-etal-2024-utilizing
%X Generative AI, as it becomes increasingly integrated into our lives, has brought convenience, though some concerns have arisen regarding its potential impact on the rigor and authenticity of scientific research. To encourage the development of robust and reliable automatically-generated scientific text detection systems, the “DAGPap24: Detecting Automatically Generated Scientific Papers” competition was held and shared the same task with the 4th Workshop on Scholarly Document Processing (SDP 2024) to be held at ACL 2024. In the DAGPap24 competition, participants were tasked with constructing a generative text detection model that could accurately distinguish between the human written fragment, the synonym replacement fragment, the ChatGPT rewrite fragment, and the generated summary fragment of a paper. In this competition, we first conducted a comprehensive analysis of the training set to build a generative paper detection model. Then we tried various language models, including SciBERT, ALBERT, DeBERTa, RoBERTa, etc. After that, we introduced an Anomalous Label Smoothing (ALS) method and a majority voting method to improve the final results. Finally, we achieved 0.9948 and 0.9944 F1 scores during the development and testing phases respectively, and we achieved second place in the competition.
%U https://aclanthology.org/2024.sdp-1.12
%P 130-134
Markdown (Informal)
[Utilizing an Ensemble Model with Anomalous Label Smoothing to Detect Generated Scientific Papers](https://aclanthology.org/2024.sdp-1.12) (Zhao et al., sdp-WS 2024)
ACL