@inproceedings{mohammadamini-etal-2026-fleurs,
title = "Fleurs-Badini: Translation and Recording Fleurs Dataset for Badini Variant of {N}orthern {K}urdish",
author = "Mohammadamini, Mohammad and
Tayib, Dilgash Mohammed Salih and
Abdulazeez, Dezheen H. and
Mohammed, Barzan Hussein and
Sadeeq, Imad Saeed and
Mohammed, Aveen Jalal and
Melhum, Amera Ismail and
Dheyab, Abuobaida Abdullah",
editor = "Salesky, Elizabeth and
Anastasopoulos, Antonios and
Negri, Matteo and
Federico, Marcello",
booktitle = "Proceedings of the 23rd International Conference on Spoken Language Translation ({IWSLT} 2026)",
month = jul,
year = "2026",
address = "San Diego, USA (in-person and online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.iwslt-1.14/",
pages = "119--123",
ISBN = "979-8-89176-411-8",
abstract = "Multilingual speech benchmarks such as the FLEURS benchmark have significantly advanced research across a wide range of languages. However, important dialects, including Badini Kurdish, remain underrepresented, limiting bechmarking in automatic speech recognition (ASR) and speech-to-text translation (S2TT). To address this limitation, this study introduces FLEURS-Badini, a dialect-focused extension designed to support research on Northern Kurdish (Badini). The dataset is constructed through a structured process of translation, recording, and validation, resulting in 5,224 utterances paired with their corresponding translated text. The data were collected from 45 speakers. To evaluate the dataset, baseline experiments are conducted using state-of-the-art models for both ASR and S2TT. The results indicate that ASR remains challenging, with the best performance achieved by the W2V-BERT CTC model, reaching a Word Error Rate (WER) of approximately 55{\%} on the test set. Similarly, speech-to-text translation performance is limited, with BLEU scores 6.13 and 5.24 on dev and test sets. Overall, FLEURS-Badini expands multilingual coverage and provides a standardized foundation for evaluating ASR and speech translation systems in the Badini dialect."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mohammadamini-etal-2026-fleurs">
<titleInfo>
<title>Fleurs-Badini: Translation and Recording Fleurs Dataset for Badini Variant of Northern Kurdish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Mohammadamini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilgash</namePart>
<namePart type="given">Mohammed</namePart>
<namePart type="given">Salih</namePart>
<namePart type="family">Tayib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dezheen</namePart>
<namePart type="given">H</namePart>
<namePart type="family">Abdulazeez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barzan</namePart>
<namePart type="given">Hussein</namePart>
<namePart type="family">Mohammed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imad</namePart>
<namePart type="given">Saeed</namePart>
<namePart type="family">Sadeeq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aveen</namePart>
<namePart type="given">Jalal</namePart>
<namePart type="family">Mohammed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amera</namePart>
<namePart type="given">Ismail</namePart>
<namePart type="family">Melhum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abuobaida</namePart>
<namePart type="given">Abdullah</namePart>
<namePart type="family">Dheyab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Salesky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA (in-person and online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-411-8</identifier>
</relatedItem>
<abstract>Multilingual speech benchmarks such as the FLEURS benchmark have significantly advanced research across a wide range of languages. However, important dialects, including Badini Kurdish, remain underrepresented, limiting bechmarking in automatic speech recognition (ASR) and speech-to-text translation (S2TT). To address this limitation, this study introduces FLEURS-Badini, a dialect-focused extension designed to support research on Northern Kurdish (Badini). The dataset is constructed through a structured process of translation, recording, and validation, resulting in 5,224 utterances paired with their corresponding translated text. The data were collected from 45 speakers. To evaluate the dataset, baseline experiments are conducted using state-of-the-art models for both ASR and S2TT. The results indicate that ASR remains challenging, with the best performance achieved by the W2V-BERT CTC model, reaching a Word Error Rate (WER) of approximately 55% on the test set. Similarly, speech-to-text translation performance is limited, with BLEU scores 6.13 and 5.24 on dev and test sets. Overall, FLEURS-Badini expands multilingual coverage and provides a standardized foundation for evaluating ASR and speech translation systems in the Badini dialect.</abstract>
<identifier type="citekey">mohammadamini-etal-2026-fleurs</identifier>
<location>
<url>https://aclanthology.org/2026.iwslt-1.14/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>119</start>
<end>123</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fleurs-Badini: Translation and Recording Fleurs Dataset for Badini Variant of Northern Kurdish
%A Mohammadamini, Mohammad
%A Tayib, Dilgash Mohammed Salih
%A Abdulazeez, Dezheen H.
%A Mohammed, Barzan Hussein
%A Sadeeq, Imad Saeed
%A Mohammed, Aveen Jalal
%A Melhum, Amera Ismail
%A Dheyab, Abuobaida Abdullah
%Y Salesky, Elizabeth
%Y Anastasopoulos, Antonios
%Y Negri, Matteo
%Y Federico, Marcello
%S Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA (in-person and online)
%@ 979-8-89176-411-8
%F mohammadamini-etal-2026-fleurs
%X Multilingual speech benchmarks such as the FLEURS benchmark have significantly advanced research across a wide range of languages. However, important dialects, including Badini Kurdish, remain underrepresented, limiting bechmarking in automatic speech recognition (ASR) and speech-to-text translation (S2TT). To address this limitation, this study introduces FLEURS-Badini, a dialect-focused extension designed to support research on Northern Kurdish (Badini). The dataset is constructed through a structured process of translation, recording, and validation, resulting in 5,224 utterances paired with their corresponding translated text. The data were collected from 45 speakers. To evaluate the dataset, baseline experiments are conducted using state-of-the-art models for both ASR and S2TT. The results indicate that ASR remains challenging, with the best performance achieved by the W2V-BERT CTC model, reaching a Word Error Rate (WER) of approximately 55% on the test set. Similarly, speech-to-text translation performance is limited, with BLEU scores 6.13 and 5.24 on dev and test sets. Overall, FLEURS-Badini expands multilingual coverage and provides a standardized foundation for evaluating ASR and speech translation systems in the Badini dialect.
%U https://aclanthology.org/2026.iwslt-1.14/
%P 119-123
Markdown (Informal)
[Fleurs-Badini: Translation and Recording Fleurs Dataset for Badini Variant of Northern Kurdish](https://aclanthology.org/2026.iwslt-1.14/) (Mohammadamini et al., IWSLT 2026)
ACL
- Mohammad Mohammadamini, Dilgash Mohammed Salih Tayib, Dezheen H. Abdulazeez, Barzan Hussein Mohammed, Imad Saeed Sadeeq, Aveen Jalal Mohammed, Amera Ismail Melhum, and Abuobaida Abdullah Dheyab. 2026. Fleurs-Badini: Translation and Recording Fleurs Dataset for Badini Variant of Northern Kurdish. In Proceedings of the 23rd International Conference on Spoken Language Translation (IWSLT 2026), pages 119–123, San Diego, USA (in-person and online). Association for Computational Linguistics.