@inproceedings{prys-watkins-2022-evaluation,
title = "Evaluation of Three {W}elsh Language {POS} Taggers",
author = "Prys, Gruffudd and
Watkins, Gareth",
editor = "Fransen, Theodorus and
Lamb, William and
Prys, Delyth",
booktitle = "Proceedings of the 4th Celtic Language Technology Workshop within LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.cltw-1.5",
pages = "30--39",
abstract = "In this paper we describe our quantitative and qualitative evaluation of three Welsh language Part of Speech (POS) taggers. Following an introductory section, we explore some of the issues which face POS taggers, discuss the state of the art in English language tagging, and describe the three Welsh language POS taggers that will be evaluated in this paper, namely WNLT2, CyTag and TagTeg. In section 3 we describe the challenges involved in evaluating POS taggers which make use of different tagsets, and introduce our mapping of the taggers{'} individual tagsets to an Intermediate Tagset used to facilitate their comparative evaluation. Section 4 introduces our benchmarking corpus as an important component of our methodology. In section 5 we describe how the inconsistencies in text tokenization between the different taggers present an issue when undertaking such evaluations, and discuss the method used to overcome this complication. Section 6 illustrates how we annotated the benchmark corpus, while section 7 describes the scoring method used. Section 8 provides an in-depth analysis of the results, and a summary of the work is presented in the conclusion found in section 9. Keywords: POS Tagger, Welsh, Evaluation, Machine Learning",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prys-watkins-2022-evaluation">
<titleInfo>
<title>Evaluation of Three Welsh Language POS Taggers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gruffudd</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gareth</namePart>
<namePart type="family">Watkins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Celtic Language Technology Workshop within LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Theodorus</namePart>
<namePart type="family">Fransen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lamb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delyth</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe our quantitative and qualitative evaluation of three Welsh language Part of Speech (POS) taggers. Following an introductory section, we explore some of the issues which face POS taggers, discuss the state of the art in English language tagging, and describe the three Welsh language POS taggers that will be evaluated in this paper, namely WNLT2, CyTag and TagTeg. In section 3 we describe the challenges involved in evaluating POS taggers which make use of different tagsets, and introduce our mapping of the taggers’ individual tagsets to an Intermediate Tagset used to facilitate their comparative evaluation. Section 4 introduces our benchmarking corpus as an important component of our methodology. In section 5 we describe how the inconsistencies in text tokenization between the different taggers present an issue when undertaking such evaluations, and discuss the method used to overcome this complication. Section 6 illustrates how we annotated the benchmark corpus, while section 7 describes the scoring method used. Section 8 provides an in-depth analysis of the results, and a summary of the work is presented in the conclusion found in section 9. Keywords: POS Tagger, Welsh, Evaluation, Machine Learning</abstract>
<identifier type="citekey">prys-watkins-2022-evaluation</identifier>
<location>
<url>https://aclanthology.org/2022.cltw-1.5</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>30</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation of Three Welsh Language POS Taggers
%A Prys, Gruffudd
%A Watkins, Gareth
%Y Fransen, Theodorus
%Y Lamb, William
%Y Prys, Delyth
%S Proceedings of the 4th Celtic Language Technology Workshop within LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F prys-watkins-2022-evaluation
%X In this paper we describe our quantitative and qualitative evaluation of three Welsh language Part of Speech (POS) taggers. Following an introductory section, we explore some of the issues which face POS taggers, discuss the state of the art in English language tagging, and describe the three Welsh language POS taggers that will be evaluated in this paper, namely WNLT2, CyTag and TagTeg. In section 3 we describe the challenges involved in evaluating POS taggers which make use of different tagsets, and introduce our mapping of the taggers’ individual tagsets to an Intermediate Tagset used to facilitate their comparative evaluation. Section 4 introduces our benchmarking corpus as an important component of our methodology. In section 5 we describe how the inconsistencies in text tokenization between the different taggers present an issue when undertaking such evaluations, and discuss the method used to overcome this complication. Section 6 illustrates how we annotated the benchmark corpus, while section 7 describes the scoring method used. Section 8 provides an in-depth analysis of the results, and a summary of the work is presented in the conclusion found in section 9. Keywords: POS Tagger, Welsh, Evaluation, Machine Learning
%U https://aclanthology.org/2022.cltw-1.5
%P 30-39
Markdown (Informal)
[Evaluation of Three Welsh Language POS Taggers](https://aclanthology.org/2022.cltw-1.5) (Prys & Watkins, CLTW 2022)
ACL
- Gruffudd Prys and Gareth Watkins. 2022. Evaluation of Three Welsh Language POS Taggers. In Proceedings of the 4th Celtic Language Technology Workshop within LREC2022, pages 30–39, Marseille, France. European Language Resources Association.