@inproceedings{doyle-mccrae-2024-developing,
title = "Developing a Part-of-speech Tagger for Diplomatically Edited {O}ld {I}rish Text",
author = "Doyle, Adrian and
McCrae, John P.",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lt4hala-1.2",
pages = "11--21",
abstract = "POS-tagging is typically considered a fundamental text preprocessing task, with a variety of downstream NLP tasks and techniques being dependent on the availability of POS-tagged corpora. As such, POS-taggers are important precursors to further NLP tasks, and their accuracy can impact the potential accuracy of these dependent tasks. While a variety of POS-tagging methods have been developed which work well with modern languages, historical languages present orthographic and editorial challenges which require special attention. The effectiveness of POS-taggers developed for modern languages is reduced when applied to Old Irish, with its comparatively complex orthography and morphology. This paper examines some of the obstacles to POS-tagging Old Irish text, and shows that inconsistencies between extant annotated corpora reduce the quantity of data available for use in training POS-taggers. The development of a multi-layer neural network model for POS-tagging Old Irish text is described, and an experiment is detailed which demonstrates that this model outperforms a variety of off-the-shelf POS-taggers. Moreover, this model sets a new benchmark for POS-tagging diplomatically edited Old Irish text.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="doyle-mccrae-2024-developing">
<titleInfo>
<title>Developing a Part-of-speech Tagger for Diplomatically Edited Old Irish Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adrian</namePart>
<namePart type="family">Doyle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">P</namePart>
<namePart type="family">McCrae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>POS-tagging is typically considered a fundamental text preprocessing task, with a variety of downstream NLP tasks and techniques being dependent on the availability of POS-tagged corpora. As such, POS-taggers are important precursors to further NLP tasks, and their accuracy can impact the potential accuracy of these dependent tasks. While a variety of POS-tagging methods have been developed which work well with modern languages, historical languages present orthographic and editorial challenges which require special attention. The effectiveness of POS-taggers developed for modern languages is reduced when applied to Old Irish, with its comparatively complex orthography and morphology. This paper examines some of the obstacles to POS-tagging Old Irish text, and shows that inconsistencies between extant annotated corpora reduce the quantity of data available for use in training POS-taggers. The development of a multi-layer neural network model for POS-tagging Old Irish text is described, and an experiment is detailed which demonstrates that this model outperforms a variety of off-the-shelf POS-taggers. Moreover, this model sets a new benchmark for POS-tagging diplomatically edited Old Irish text.</abstract>
<identifier type="citekey">doyle-mccrae-2024-developing</identifier>
<location>
<url>https://aclanthology.org/2024.lt4hala-1.2</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing a Part-of-speech Tagger for Diplomatically Edited Old Irish Text
%A Doyle, Adrian
%A McCrae, John P.
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F doyle-mccrae-2024-developing
%X POS-tagging is typically considered a fundamental text preprocessing task, with a variety of downstream NLP tasks and techniques being dependent on the availability of POS-tagged corpora. As such, POS-taggers are important precursors to further NLP tasks, and their accuracy can impact the potential accuracy of these dependent tasks. While a variety of POS-tagging methods have been developed which work well with modern languages, historical languages present orthographic and editorial challenges which require special attention. The effectiveness of POS-taggers developed for modern languages is reduced when applied to Old Irish, with its comparatively complex orthography and morphology. This paper examines some of the obstacles to POS-tagging Old Irish text, and shows that inconsistencies between extant annotated corpora reduce the quantity of data available for use in training POS-taggers. The development of a multi-layer neural network model for POS-tagging Old Irish text is described, and an experiment is detailed which demonstrates that this model outperforms a variety of off-the-shelf POS-taggers. Moreover, this model sets a new benchmark for POS-tagging diplomatically edited Old Irish text.
%U https://aclanthology.org/2024.lt4hala-1.2
%P 11-21
Markdown (Informal)
[Developing a Part-of-speech Tagger for Diplomatically Edited Old Irish Text](https://aclanthology.org/2024.lt4hala-1.2) (Doyle & McCrae, LT4HALA-WS 2024)
ACL