@inproceedings{hassani-2025-fine,
title = "Fine-tuning {XLM}-{R}o{BERT}a for Named Entity Recognition in {K}urmanji {K}urdish",
author = "Hassani, Hossein",
editor = "Zhang, Chen and
Allaway, Emily and
Shen, Hua and
Miculicich, Lesly and
Li, Yinqiao and
M'hamdi, Meryem and
Limkonchotiwat, Peerat and
Bai, Richard He and
T.y.s.s., Santosh and
Han, Sophia Simeng and
Thapa, Surendrabikram and
Rim, Wiem Ben",
booktitle = "Proceedings of the 9th Widening NLP Workshop",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.winlp-main.8/",
pages = "41--45",
ISBN = "979-8-89176-351-7",
abstract = "Named Entity Recognition (NER) is the information extraction task of identifying predefined named entities such as person names, location names, organization names and more. High-resource languages have made significant progress in NER tasks. However, low-resource languages such as Kurmanji Kurdish have not seen the same advancements, due to these languages having less available data online. This research aims to close this gap by developing an NER system via fine-tuning XLM-RoBERTa on a manually annotated dataset for Kurmanji. The dataset used for fine-tuning consists of 7,919 annotated sentences, which were manually annotated by three native Kurmanji speakers. The classes labeled in the dataset are Person (PER), Organization (ORG), and Location (LOC). A web-based application has also been developed using Streamlit to make the model more accessible. The model achieved an F1 score of 0.8735, precision of 0.8668, and recall of 0.8803, demonstrating the effectiveness of fine-tuning transformer-based models for NER tasks in low-resource languages. This work establishes a methodology that can be applied to other low-resource languages and Kurdish varieties."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hassani-2025-fine">
<titleInfo>
<title>Fine-tuning XLM-RoBERTa for Named Entity Recognition in Kurmanji Kurdish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hossein</namePart>
<namePart type="family">Hassani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Widening NLP Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Allaway</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hua</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lesly</namePart>
<namePart type="family">Miculicich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinqiao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meryem</namePart>
<namePart type="family">M’hamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peerat</namePart>
<namePart type="family">Limkonchotiwat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">He</namePart>
<namePart type="family">Bai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.y.s.s.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="given">Simeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Surendrabikram</namePart>
<namePart type="family">Thapa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wiem</namePart>
<namePart type="given">Ben</namePart>
<namePart type="family">Rim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-351-7</identifier>
</relatedItem>
<abstract>Named Entity Recognition (NER) is the information extraction task of identifying predefined named entities such as person names, location names, organization names and more. High-resource languages have made significant progress in NER tasks. However, low-resource languages such as Kurmanji Kurdish have not seen the same advancements, due to these languages having less available data online. This research aims to close this gap by developing an NER system via fine-tuning XLM-RoBERTa on a manually annotated dataset for Kurmanji. The dataset used for fine-tuning consists of 7,919 annotated sentences, which were manually annotated by three native Kurmanji speakers. The classes labeled in the dataset are Person (PER), Organization (ORG), and Location (LOC). A web-based application has also been developed using Streamlit to make the model more accessible. The model achieved an F1 score of 0.8735, precision of 0.8668, and recall of 0.8803, demonstrating the effectiveness of fine-tuning transformer-based models for NER tasks in low-resource languages. This work establishes a methodology that can be applied to other low-resource languages and Kurdish varieties.</abstract>
<identifier type="citekey">hassani-2025-fine</identifier>
<location>
<url>https://aclanthology.org/2025.winlp-main.8/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>41</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-tuning XLM-RoBERTa for Named Entity Recognition in Kurmanji Kurdish
%A Hassani, Hossein
%Y Zhang, Chen
%Y Allaway, Emily
%Y Shen, Hua
%Y Miculicich, Lesly
%Y Li, Yinqiao
%Y M’hamdi, Meryem
%Y Limkonchotiwat, Peerat
%Y Bai, Richard He
%Y T.y.s.s., Santosh
%Y Han, Sophia Simeng
%Y Thapa, Surendrabikram
%Y Rim, Wiem Ben
%S Proceedings of the 9th Widening NLP Workshop
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-351-7
%F hassani-2025-fine
%X Named Entity Recognition (NER) is the information extraction task of identifying predefined named entities such as person names, location names, organization names and more. High-resource languages have made significant progress in NER tasks. However, low-resource languages such as Kurmanji Kurdish have not seen the same advancements, due to these languages having less available data online. This research aims to close this gap by developing an NER system via fine-tuning XLM-RoBERTa on a manually annotated dataset for Kurmanji. The dataset used for fine-tuning consists of 7,919 annotated sentences, which were manually annotated by three native Kurmanji speakers. The classes labeled in the dataset are Person (PER), Organization (ORG), and Location (LOC). A web-based application has also been developed using Streamlit to make the model more accessible. The model achieved an F1 score of 0.8735, precision of 0.8668, and recall of 0.8803, demonstrating the effectiveness of fine-tuning transformer-based models for NER tasks in low-resource languages. This work establishes a methodology that can be applied to other low-resource languages and Kurdish varieties.
%U https://aclanthology.org/2025.winlp-main.8/
%P 41-45
Markdown (Informal)
[Fine-tuning XLM-RoBERTa for Named Entity Recognition in Kurmanji Kurdish](https://aclanthology.org/2025.winlp-main.8/) (Hassani, WiNLP 2025)
ACL