@inproceedings{wrobel-2025-unsupervised,
title = "Unsupervised Detection of {LLM}-Generated {P}olish Text Using Perplexity Difference",
author = "Wr{\'o}bel, Krzysztof",
editor = "Kobyli{\'n}ski, {\L}ukasz and
Wr{\'o}blewska, Alina and
Ogrodniczuk, Maciej",
booktitle = "Proceedings of the {P}ol{E}val 2025 Workshop",
month = nov,
year = "2025",
address = "Warsaw",
publisher = "Institute of Computer Science PAS and Association for Computational Linguistics",
url = "https://aclanthology.org/2025.poleval-main.5/",
pages = "26--38",
abstract = "Inspired by zero-shot detection methods that compare perplexity across model pairs, we investigate whether computing perplexity differences on whole-text character-level perplexity can effectively detect LLM-generated Polish text. Unlike token-level ratio methods that require compatible tokenizers, our approach enables pairing any models regardless of tokenization. Through systematic evaluation of 91 model pairs on the PolEval 2025 {\'S}MIGIEL shared task, we identify Gemma-3-27B and PLLuM-12B as optimal, achieving 81.22{\%} accuracy on test data with unseen generators. Our difference-based approach outperforms token-level ratio methods (+5.5pp) and single-model baselines (+8.3pp) without using training labels, capturing asymmetric reactions where human text causes greater perplexity divergence than LLM text. We demonstrate that complementary model pairing (multilingual + monolingual) and architectural quality matter more than raw model size for this task."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wrobel-2025-unsupervised">
<titleInfo>
<title>Unsupervised Detection of LLM-Generated Polish Text Using Perplexity Difference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Wróbel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the PolEval 2025 Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Łukasz</namePart>
<namePart type="family">Kobyliński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Wróblewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Institute of Computer Science PAS and Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Warsaw</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Inspired by zero-shot detection methods that compare perplexity across model pairs, we investigate whether computing perplexity differences on whole-text character-level perplexity can effectively detect LLM-generated Polish text. Unlike token-level ratio methods that require compatible tokenizers, our approach enables pairing any models regardless of tokenization. Through systematic evaluation of 91 model pairs on the PolEval 2025 ŚMIGIEL shared task, we identify Gemma-3-27B and PLLuM-12B as optimal, achieving 81.22% accuracy on test data with unseen generators. Our difference-based approach outperforms token-level ratio methods (+5.5pp) and single-model baselines (+8.3pp) without using training labels, capturing asymmetric reactions where human text causes greater perplexity divergence than LLM text. We demonstrate that complementary model pairing (multilingual + monolingual) and architectural quality matter more than raw model size for this task.</abstract>
<identifier type="citekey">wrobel-2025-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2025.poleval-main.5/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>26</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Detection of LLM-Generated Polish Text Using Perplexity Difference
%A Wróbel, Krzysztof
%Y Kobyliński, Łukasz
%Y Wróblewska, Alina
%Y Ogrodniczuk, Maciej
%S Proceedings of the PolEval 2025 Workshop
%D 2025
%8 November
%I Institute of Computer Science PAS and Association for Computational Linguistics
%C Warsaw
%F wrobel-2025-unsupervised
%X Inspired by zero-shot detection methods that compare perplexity across model pairs, we investigate whether computing perplexity differences on whole-text character-level perplexity can effectively detect LLM-generated Polish text. Unlike token-level ratio methods that require compatible tokenizers, our approach enables pairing any models regardless of tokenization. Through systematic evaluation of 91 model pairs on the PolEval 2025 ŚMIGIEL shared task, we identify Gemma-3-27B and PLLuM-12B as optimal, achieving 81.22% accuracy on test data with unseen generators. Our difference-based approach outperforms token-level ratio methods (+5.5pp) and single-model baselines (+8.3pp) without using training labels, capturing asymmetric reactions where human text causes greater perplexity divergence than LLM text. We demonstrate that complementary model pairing (multilingual + monolingual) and architectural quality matter more than raw model size for this task.
%U https://aclanthology.org/2025.poleval-main.5/
%P 26-38
Markdown (Informal)
[Unsupervised Detection of LLM-Generated Polish Text Using Perplexity Difference](https://aclanthology.org/2025.poleval-main.5/) (Wróbel, PolEval 2025)
ACL