@inproceedings{adkins-etal-2025-named,
title = "Named Entity Recognition for the {I}rish Language",
author = "Adkins, Jane and
Collins, Hugo and
Wagner, Joachim and
Walsh, Abigail and
Davis, Brian",
editor = {Ojha, Atul Kr. and
Giouli, Voula and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Korvel, Gra{\v{z}}ina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, U.S.A.",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.mwe-1.9/",
doi = "10.18653/v1/2025.mwe-1.9",
pages = "82--96",
ISBN = "979-8-89176-243-5",
abstract = "The Irish language has been deemed `definitely endangered' (Moseley, 2012) and has been clas- sified as having `weak or no support' (Lynn, 2023) regarding digital resources in spite of its status as the first official and national language of the Republic of Ireland. This research de- velops the first named entity recognition (NER) tool for the Irish language, one of the essen- tial tasks identified by the Digital Plan for Irish (N{\'i} Chasaide et al., 2022). In this study, we produce a small gold-standard NER-annotated corpus and compare both monolingual and mul- tilingual BERT models fine-tuned on this task. We experiment with different model architec- tures and low-resource language approaches to enrich our dataset. We test our models on a mix of single- and multi-word named entities as well as a specific multi-word named entity test set. Our proposed gaBERT model with the implementation of random data augmentation and a conditional random fields layer demon- strates significant performance improvements over baseline models, alternative architectures, and multilingual models, achieving an F1 score of 76.52. This study contributes to advanc- ing Irish language technologies and supporting Irish language digital resources, providing a basis for Irish NER and identification of other MWE types."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="adkins-etal-2025-named">
<titleInfo>
<title>Named Entity Recognition for the Irish Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jane</namePart>
<namePart type="family">Adkins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="family">Collins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joachim</namePart>
<namePart type="family">Wagner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abigail</namePart>
<namePart type="family">Walsh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Voula</namePart>
<namePart type="family">Giouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gražina</namePart>
<namePart type="family">Korvel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, U.S.A.</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-243-5</identifier>
</relatedItem>
<abstract>The Irish language has been deemed ‘definitely endangered’ (Moseley, 2012) and has been clas- sified as having ‘weak or no support’ (Lynn, 2023) regarding digital resources in spite of its status as the first official and national language of the Republic of Ireland. This research de- velops the first named entity recognition (NER) tool for the Irish language, one of the essen- tial tasks identified by the Digital Plan for Irish (Ní Chasaide et al., 2022). In this study, we produce a small gold-standard NER-annotated corpus and compare both monolingual and mul- tilingual BERT models fine-tuned on this task. We experiment with different model architec- tures and low-resource language approaches to enrich our dataset. We test our models on a mix of single- and multi-word named entities as well as a specific multi-word named entity test set. Our proposed gaBERT model with the implementation of random data augmentation and a conditional random fields layer demon- strates significant performance improvements over baseline models, alternative architectures, and multilingual models, achieving an F1 score of 76.52. This study contributes to advanc- ing Irish language technologies and supporting Irish language digital resources, providing a basis for Irish NER and identification of other MWE types.</abstract>
<identifier type="citekey">adkins-etal-2025-named</identifier>
<identifier type="doi">10.18653/v1/2025.mwe-1.9</identifier>
<location>
<url>https://aclanthology.org/2025.mwe-1.9/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>82</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Named Entity Recognition for the Irish Language
%A Adkins, Jane
%A Collins, Hugo
%A Wagner, Joachim
%A Walsh, Abigail
%A Davis, Brian
%Y Ojha, Atul Kr.
%Y Giouli, Voula
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Korvel, Gražina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, U.S.A.
%@ 979-8-89176-243-5
%F adkins-etal-2025-named
%X The Irish language has been deemed ‘definitely endangered’ (Moseley, 2012) and has been clas- sified as having ‘weak or no support’ (Lynn, 2023) regarding digital resources in spite of its status as the first official and national language of the Republic of Ireland. This research de- velops the first named entity recognition (NER) tool for the Irish language, one of the essen- tial tasks identified by the Digital Plan for Irish (Ní Chasaide et al., 2022). In this study, we produce a small gold-standard NER-annotated corpus and compare both monolingual and mul- tilingual BERT models fine-tuned on this task. We experiment with different model architec- tures and low-resource language approaches to enrich our dataset. We test our models on a mix of single- and multi-word named entities as well as a specific multi-word named entity test set. Our proposed gaBERT model with the implementation of random data augmentation and a conditional random fields layer demon- strates significant performance improvements over baseline models, alternative architectures, and multilingual models, achieving an F1 score of 76.52. This study contributes to advanc- ing Irish language technologies and supporting Irish language digital resources, providing a basis for Irish NER and identification of other MWE types.
%R 10.18653/v1/2025.mwe-1.9
%U https://aclanthology.org/2025.mwe-1.9/
%U https://doi.org/10.18653/v1/2025.mwe-1.9
%P 82-96
Markdown (Informal)
[Named Entity Recognition for the Irish Language](https://aclanthology.org/2025.mwe-1.9/) (Adkins et al., MWE 2025)
ACL
- Jane Adkins, Hugo Collins, Joachim Wagner, Abigail Walsh, and Brian Davis. 2025. Named Entity Recognition for the Irish Language. In Proceedings of the 21st Workshop on Multiword Expressions (MWE 2025), pages 82–96, Albuquerque, New Mexico, U.S.A.. Association for Computational Linguistics.