@inproceedings{kuczynski-2025-zero,
title = "Zero-Shot Transfer of Pretrained Speech Representations for Multilingual Emotion Recognition",
author = "Kuczy{\'n}ski, Tomasz",
editor = "Kobyli{\'n}ski, {\L}ukasz and
Wr{\'o}blewska, Alina and
Ogrodniczuk, Maciej",
booktitle = "Proceedings of the {P}ol{E}val 2025 Workshop",
month = nov,
year = "2025",
address = "Warsaw",
publisher = "Institute of Computer Science PAS and Association for Computational Linguistics",
url = "https://aclanthology.org/2025.poleval-main.13/",
pages = "91--96",
abstract = "Speech emotion recognition remains a challenging task, particularly in low-resource language settings. In this work, we explore the development of a system capable of identifying emotional states in Polish speech using training data exclusively from other languages. Our approach relies on a pretrained speech representation model and follows a strict zero-shot training paradigm, enabling cross-lingual knowledge transfer without access to any Polish data. The system was developed in the context of the Polish Speech Emotion Recognition Challenge (PolEval 2025), which required participants to train models solely on multilingual resources and evaluate them on Polish speech in a zero-shot setup. We present a complete solution encompassing model selection, audio preprocessing, and fine-tuning strategy, and discuss the potential of large-scale language models for cross-lingual emotion recognition."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuczynski-2025-zero">
<titleInfo>
<title>Zero-Shot Transfer of Pretrained Speech Representations for Multilingual Emotion Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomasz</namePart>
<namePart type="family">Kuczyński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the PolEval 2025 Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Łukasz</namePart>
<namePart type="family">Kobyliński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="family">Wróblewska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Institute of Computer Science PAS and Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Warsaw</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Speech emotion recognition remains a challenging task, particularly in low-resource language settings. In this work, we explore the development of a system capable of identifying emotional states in Polish speech using training data exclusively from other languages. Our approach relies on a pretrained speech representation model and follows a strict zero-shot training paradigm, enabling cross-lingual knowledge transfer without access to any Polish data. The system was developed in the context of the Polish Speech Emotion Recognition Challenge (PolEval 2025), which required participants to train models solely on multilingual resources and evaluate them on Polish speech in a zero-shot setup. We present a complete solution encompassing model selection, audio preprocessing, and fine-tuning strategy, and discuss the potential of large-scale language models for cross-lingual emotion recognition.</abstract>
<identifier type="citekey">kuczynski-2025-zero</identifier>
<location>
<url>https://aclanthology.org/2025.poleval-main.13/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>91</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Zero-Shot Transfer of Pretrained Speech Representations for Multilingual Emotion Recognition
%A Kuczyński, Tomasz
%Y Kobyliński, Łukasz
%Y Wróblewska, Alina
%Y Ogrodniczuk, Maciej
%S Proceedings of the PolEval 2025 Workshop
%D 2025
%8 November
%I Institute of Computer Science PAS and Association for Computational Linguistics
%C Warsaw
%F kuczynski-2025-zero
%X Speech emotion recognition remains a challenging task, particularly in low-resource language settings. In this work, we explore the development of a system capable of identifying emotional states in Polish speech using training data exclusively from other languages. Our approach relies on a pretrained speech representation model and follows a strict zero-shot training paradigm, enabling cross-lingual knowledge transfer without access to any Polish data. The system was developed in the context of the Polish Speech Emotion Recognition Challenge (PolEval 2025), which required participants to train models solely on multilingual resources and evaluate them on Polish speech in a zero-shot setup. We present a complete solution encompassing model selection, audio preprocessing, and fine-tuning strategy, and discuss the potential of large-scale language models for cross-lingual emotion recognition.
%U https://aclanthology.org/2025.poleval-main.13/
%P 91-96
Markdown (Informal)
[Zero-Shot Transfer of Pretrained Speech Representations for Multilingual Emotion Recognition](https://aclanthology.org/2025.poleval-main.13/) (Kuczyński, PolEval 2025)
ACL