@inproceedings{przybyla-etal-2025-poleval,
title = "{P}ol{E}val 2025 Task 1 {\'S}migiel: Spotting Machine-Generated Text from {LLM}s for {P}olish",
author = "Przyby{\l}a, Piotr and
Strebeyko, Jakub and
Wr{\'o}blewska, Alina",
editor = "Kobyli{\'n}ski, {\L}ukasz and
Wr{\'o}blewska, Alina and
Ogrodniczuk, Maciej",
booktitle = "Proceedings of the {P}ol{E}val 2025 Workshop",
month = nov,
year = "2025",
address = "Warsaw",
publisher = "Institute of Computer Science PAS and Association for Computational Linguistics",
url = "https://aclanthology.org/2025.poleval-main.2/",
pages = "5--15",
abstract = "This paper introduces the first shared task on machine-generated text (MGT) detection for Polish, organised as part of the PolEval 2025 evaluation campaign. The task evaluates participating systems under three scenarios {---} unsupervised, constrained, and open {---} designed to reflect different levels of access to training data. In total, seven systems were submitted.The results indicate that MGT detection for Polish is feasible, with the best-performing constrained systems achieving over 90{\%} accuracy on the main evaluation set. However, performance drops when models are tested on unseen domains or generator models, revealing substantial limitations in generalisation. In the most challenging settings, unsupervised approaches perform better, despite achieving overall lower performance.This shared task establishes a new benchmark for MGT detection in Polish. The publicly released {\'S}migiel dataset is intended to support future research on robust and generalisable MGT detection methods."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="przybyla-etal-2025-poleval">
<titleInfo>
<title>PolEval 2025 Task 1 Śmigiel: Spotting Machine-Generated Text from LLMs for Polish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Przybyła</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Strebeyko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Wróblewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the PolEval 2025 Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Łukasz</namePart>
<namePart type="family">Kobyliński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Wróblewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Institute of Computer Science PAS and Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Warsaw</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces the first shared task on machine-generated text (MGT) detection for Polish, organised as part of the PolEval 2025 evaluation campaign. The task evaluates participating systems under three scenarios — unsupervised, constrained, and open — designed to reflect different levels of access to training data. In total, seven systems were submitted.The results indicate that MGT detection for Polish is feasible, with the best-performing constrained systems achieving over 90% accuracy on the main evaluation set. However, performance drops when models are tested on unseen domains or generator models, revealing substantial limitations in generalisation. In the most challenging settings, unsupervised approaches perform better, despite achieving overall lower performance.This shared task establishes a new benchmark for MGT detection in Polish. The publicly released Śmigiel dataset is intended to support future research on robust and generalisable MGT detection methods.</abstract>
<identifier type="citekey">przybyla-etal-2025-poleval</identifier>
<location>
<url>https://aclanthology.org/2025.poleval-main.2/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>5</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PolEval 2025 Task 1 Śmigiel: Spotting Machine-Generated Text from LLMs for Polish
%A Przybyła, Piotr
%A Strebeyko, Jakub
%A Wróblewska, Alina
%Y Kobyliński, Łukasz
%Y Wróblewska, Alina
%Y Ogrodniczuk, Maciej
%S Proceedings of the PolEval 2025 Workshop
%D 2025
%8 November
%I Institute of Computer Science PAS and Association for Computational Linguistics
%C Warsaw
%F przybyla-etal-2025-poleval
%X This paper introduces the first shared task on machine-generated text (MGT) detection for Polish, organised as part of the PolEval 2025 evaluation campaign. The task evaluates participating systems under three scenarios — unsupervised, constrained, and open — designed to reflect different levels of access to training data. In total, seven systems were submitted.The results indicate that MGT detection for Polish is feasible, with the best-performing constrained systems achieving over 90% accuracy on the main evaluation set. However, performance drops when models are tested on unseen domains or generator models, revealing substantial limitations in generalisation. In the most challenging settings, unsupervised approaches perform better, despite achieving overall lower performance.This shared task establishes a new benchmark for MGT detection in Polish. The publicly released Śmigiel dataset is intended to support future research on robust and generalisable MGT detection methods.
%U https://aclanthology.org/2025.poleval-main.2/
%P 5-15
Markdown (Informal)
[PolEval 2025 Task 1 Śmigiel: Spotting Machine-Generated Text from LLMs for Polish](https://aclanthology.org/2025.poleval-main.2/) (Przybyła et al., PolEval 2025)
ACL