@inproceedings{finn-etal-2022-annotating,
title = "Annotating {``}Particles{''} in Multiword Expressions in te reo {M}{\=a}ori for a Part-of-Speech Tagger",
author = "Finn, Aoife and
Duncan, Suzanne and
Jones, Peter-Lucas and
Leoni, Gianna and
Mahelona, Keoni",
editor = "Bhatia, Archna and
Cook, Paul and
Taslimipoor, Shiva and
Garcia, Marcos and
Ramisch, Carlos",
booktitle = "Proceedings of the 18th Workshop on Multiword Expressions @LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.mwe-1.10",
pages = "67--74",
abstract = "This paper discusses the development of a Part-of-Speech tagger for te reo M{\=a}ori, which is the Indigenous language of Aotearoa, also known as New Zealand. Te reo M{\=a}ori is a particularly analytical and polysemic language. A word class called {``}particles{''} is introduced, they are small multi-functional words with many meanings, for example {\=e}, ai, noa, rawa, mai, an{\=o} and koa. These {``}particles{''} are reflective of the analytical and polysemous nature of te reo M{\=a}ori. They frequently occur both singularly and also in multiword expressions, including time adverbial phrases. The paper illustrates the challenges that they presented to part-of-speech tagging. It also discusses how we overcome these challenges in a way that is appropriate for te reo M{\=a}ori, given its status an Indigenous language and history of colonisation. This includes a discussion of the importance of accurately reflecting the conceptualization of te reo M{\=a}ori. And how this involved making no linguistic presumptions, and of eliciting faithful judgements from speakers, in a way that is uninfluenced by linguistic terminology.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="finn-etal-2022-annotating">
<titleInfo>
<title>Annotating “Particles” in Multiword Expressions in te reo Māori for a Part-of-Speech Tagger</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aoife</namePart>
<namePart type="family">Finn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzanne</namePart>
<namePart type="family">Duncan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter-Lucas</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gianna</namePart>
<namePart type="family">Leoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keoni</namePart>
<namePart type="family">Mahelona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Workshop on Multiword Expressions @LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archna</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Taslimipoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses the development of a Part-of-Speech tagger for te reo Māori, which is the Indigenous language of Aotearoa, also known as New Zealand. Te reo Māori is a particularly analytical and polysemic language. A word class called “particles” is introduced, they are small multi-functional words with many meanings, for example ē, ai, noa, rawa, mai, anō and koa. These “particles” are reflective of the analytical and polysemous nature of te reo Māori. They frequently occur both singularly and also in multiword expressions, including time adverbial phrases. The paper illustrates the challenges that they presented to part-of-speech tagging. It also discusses how we overcome these challenges in a way that is appropriate for te reo Māori, given its status an Indigenous language and history of colonisation. This includes a discussion of the importance of accurately reflecting the conceptualization of te reo Māori. And how this involved making no linguistic presumptions, and of eliciting faithful judgements from speakers, in a way that is uninfluenced by linguistic terminology.</abstract>
<identifier type="citekey">finn-etal-2022-annotating</identifier>
<location>
<url>https://aclanthology.org/2022.mwe-1.10</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>67</start>
<end>74</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Annotating “Particles” in Multiword Expressions in te reo Māori for a Part-of-Speech Tagger
%A Finn, Aoife
%A Duncan, Suzanne
%A Jones, Peter-Lucas
%A Leoni, Gianna
%A Mahelona, Keoni
%Y Bhatia, Archna
%Y Cook, Paul
%Y Taslimipoor, Shiva
%Y Garcia, Marcos
%Y Ramisch, Carlos
%S Proceedings of the 18th Workshop on Multiword Expressions @LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F finn-etal-2022-annotating
%X This paper discusses the development of a Part-of-Speech tagger for te reo Māori, which is the Indigenous language of Aotearoa, also known as New Zealand. Te reo Māori is a particularly analytical and polysemic language. A word class called “particles” is introduced, they are small multi-functional words with many meanings, for example ē, ai, noa, rawa, mai, anō and koa. These “particles” are reflective of the analytical and polysemous nature of te reo Māori. They frequently occur both singularly and also in multiword expressions, including time adverbial phrases. The paper illustrates the challenges that they presented to part-of-speech tagging. It also discusses how we overcome these challenges in a way that is appropriate for te reo Māori, given its status an Indigenous language and history of colonisation. This includes a discussion of the importance of accurately reflecting the conceptualization of te reo Māori. And how this involved making no linguistic presumptions, and of eliciting faithful judgements from speakers, in a way that is uninfluenced by linguistic terminology.
%U https://aclanthology.org/2022.mwe-1.10
%P 67-74
Markdown (Informal)
[Annotating “Particles” in Multiword Expressions in te reo Māori for a Part-of-Speech Tagger](https://aclanthology.org/2022.mwe-1.10) (Finn et al., MWE 2022)
ACL