@inproceedings{ciobanu-etal-2026-xmad,
title = "{XMAD}-Bench: Cross-Domain Multilingual Audio Deepfake Benchmark",
author = "Ciobanu, Ioan-Paul and
H{\^i}ji, Andrei-Iulian and
Ristea, Nicolae Catalin and
Irofti, Paul and
Rusu, Cristian and
Ionescu, Radu Tudor",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.162/",
pages = "3109--3120",
ISBN = "979-8-89176-386-9",
abstract = "Recent advances in audio generation led to an increasing number of deepfakes, making the general public more vulnerable to financial scams, identity theft, and misinformation. Audio deepfake detectors promise to alleviate this issue, with many recent studies reporting accuracy rates close to 99{\%}. However, these methods are typically tested in an in-domain setup, where the deepfake samples from the training and test sets are produced by the same generative models. To this end, we introduce XMAD-Bench, a large-scale cross-domain multilingual audio deepfake benchmark comprising 668.8 hours of real and deepfake speech. In our novel dataset, the speakers, the generative methods, and the real audio sources are distinct across training and test splits. This leads to a challenging cross-domain evaluation setup, where audio deepfake detectors can be tested ``in the wild''. Our in-domain and cross-domain experiments indicate a clear disparity between the in-domain performance of deepfake detectors, which is usually as high as 100{\%}, and the cross-domain performance of the same models, which is sometimes similar to random chance. Our benchmark highlights the need for the development of robust audio deepfake detectors, which maintain their generalization capacity across different languages, speakers, generative methods, and data sources. Our benchmark is publicly released at https://github.com/ristea/xmad-bench/."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ciobanu-etal-2026-xmad">
<titleInfo>
<title>XMAD-Bench: Cross-Domain Multilingual Audio Deepfake Benchmark</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ioan-Paul</namePart>
<namePart type="family">Ciobanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei-Iulian</namePart>
<namePart type="family">Hîji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolae</namePart>
<namePart type="given">Catalin</namePart>
<namePart type="family">Ristea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Irofti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristian</namePart>
<namePart type="family">Rusu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radu</namePart>
<namePart type="given">Tudor</namePart>
<namePart type="family">Ionescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Recent advances in audio generation led to an increasing number of deepfakes, making the general public more vulnerable to financial scams, identity theft, and misinformation. Audio deepfake detectors promise to alleviate this issue, with many recent studies reporting accuracy rates close to 99%. However, these methods are typically tested in an in-domain setup, where the deepfake samples from the training and test sets are produced by the same generative models. To this end, we introduce XMAD-Bench, a large-scale cross-domain multilingual audio deepfake benchmark comprising 668.8 hours of real and deepfake speech. In our novel dataset, the speakers, the generative methods, and the real audio sources are distinct across training and test splits. This leads to a challenging cross-domain evaluation setup, where audio deepfake detectors can be tested “in the wild”. Our in-domain and cross-domain experiments indicate a clear disparity between the in-domain performance of deepfake detectors, which is usually as high as 100%, and the cross-domain performance of the same models, which is sometimes similar to random chance. Our benchmark highlights the need for the development of robust audio deepfake detectors, which maintain their generalization capacity across different languages, speakers, generative methods, and data sources. Our benchmark is publicly released at https://github.com/ristea/xmad-bench/.</abstract>
<identifier type="citekey">ciobanu-etal-2026-xmad</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.162/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>3109</start>
<end>3120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T XMAD-Bench: Cross-Domain Multilingual Audio Deepfake Benchmark
%A Ciobanu, Ioan-Paul
%A Hîji, Andrei-Iulian
%A Ristea, Nicolae Catalin
%A Irofti, Paul
%A Rusu, Cristian
%A Ionescu, Radu Tudor
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F ciobanu-etal-2026-xmad
%X Recent advances in audio generation led to an increasing number of deepfakes, making the general public more vulnerable to financial scams, identity theft, and misinformation. Audio deepfake detectors promise to alleviate this issue, with many recent studies reporting accuracy rates close to 99%. However, these methods are typically tested in an in-domain setup, where the deepfake samples from the training and test sets are produced by the same generative models. To this end, we introduce XMAD-Bench, a large-scale cross-domain multilingual audio deepfake benchmark comprising 668.8 hours of real and deepfake speech. In our novel dataset, the speakers, the generative methods, and the real audio sources are distinct across training and test splits. This leads to a challenging cross-domain evaluation setup, where audio deepfake detectors can be tested “in the wild”. Our in-domain and cross-domain experiments indicate a clear disparity between the in-domain performance of deepfake detectors, which is usually as high as 100%, and the cross-domain performance of the same models, which is sometimes similar to random chance. Our benchmark highlights the need for the development of robust audio deepfake detectors, which maintain their generalization capacity across different languages, speakers, generative methods, and data sources. Our benchmark is publicly released at https://github.com/ristea/xmad-bench/.
%U https://aclanthology.org/2026.findings-eacl.162/
%P 3109-3120
Markdown (Informal)
[XMAD-Bench: Cross-Domain Multilingual Audio Deepfake Benchmark](https://aclanthology.org/2026.findings-eacl.162/) (Ciobanu et al., Findings 2026)
ACL
- Ioan-Paul Ciobanu, Andrei-Iulian Hîji, Nicolae Catalin Ristea, Paul Irofti, Cristian Rusu, and Radu Tudor Ionescu. 2026. XMAD-Bench: Cross-Domain Multilingual Audio Deepfake Benchmark. In Findings of the Association for Computational Linguistics: EACL 2026, pages 3109–3120, Rabat, Morocco. Association for Computational Linguistics.