@inproceedings{teodorescu-etal-2022-cree,
title = "{C}ree Corpus: A Collection of n{\^e}hiyaw{\^e}win Resources",
author = "Teodorescu, Daniela and
Matalski, Josie and
Lothian, Delaney and
Barbosa, Denilson and
Demmans Epp, Carrie",
editor = "Muresan, Smaranda and
Nakov, Preslav and
Villavicencio, Aline",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.440",
doi = "10.18653/v1/2022.acl-long.440",
pages = "6354--6364",
abstract = "Plains Cree (n{\^e}hiyaw{\^e}win) is an Indigenous language that is spoken in Canada and the USA. It is the most widely spoken dialect of Cree and a morphologically complex language that is polysynthetic, highly inflective, and agglutinative. It is an extremely low resource language, with no existing corpus that is both available and prepared for supporting the development of language technologies. To support n{\^e}hiyaw{\^e}win revitalization and preservation, we developed a corpus covering diverse genres, time periods, and texts for a variety of intended audiences. The data has been verified and cleaned; it is ready for use in developing language technologies for n{\^e}hiyaw{\^e}win. The corpus includes the corresponding English phrases or audio files where available. We demonstrate the utility of the corpus through its community use and its use to build language technologies that can provide the types of support that community members have expressed are desirable. The corpus is available for public use.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="teodorescu-etal-2022-cree">
<titleInfo>
<title>Cree Corpus: A Collection of nêhiyawêwin Resources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniela</namePart>
<namePart type="family">Teodorescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josie</namePart>
<namePart type="family">Matalski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delaney</namePart>
<namePart type="family">Lothian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denilson</namePart>
<namePart type="family">Barbosa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carrie</namePart>
<namePart type="family">Demmans Epp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Smaranda</namePart>
<namePart type="family">Muresan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Plains Cree (nêhiyawêwin) is an Indigenous language that is spoken in Canada and the USA. It is the most widely spoken dialect of Cree and a morphologically complex language that is polysynthetic, highly inflective, and agglutinative. It is an extremely low resource language, with no existing corpus that is both available and prepared for supporting the development of language technologies. To support nêhiyawêwin revitalization and preservation, we developed a corpus covering diverse genres, time periods, and texts for a variety of intended audiences. The data has been verified and cleaned; it is ready for use in developing language technologies for nêhiyawêwin. The corpus includes the corresponding English phrases or audio files where available. We demonstrate the utility of the corpus through its community use and its use to build language technologies that can provide the types of support that community members have expressed are desirable. The corpus is available for public use.</abstract>
<identifier type="citekey">teodorescu-etal-2022-cree</identifier>
<identifier type="doi">10.18653/v1/2022.acl-long.440</identifier>
<location>
<url>https://aclanthology.org/2022.acl-long.440</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>6354</start>
<end>6364</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cree Corpus: A Collection of nêhiyawêwin Resources
%A Teodorescu, Daniela
%A Matalski, Josie
%A Lothian, Delaney
%A Barbosa, Denilson
%A Demmans Epp, Carrie
%Y Muresan, Smaranda
%Y Nakov, Preslav
%Y Villavicencio, Aline
%S Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F teodorescu-etal-2022-cree
%X Plains Cree (nêhiyawêwin) is an Indigenous language that is spoken in Canada and the USA. It is the most widely spoken dialect of Cree and a morphologically complex language that is polysynthetic, highly inflective, and agglutinative. It is an extremely low resource language, with no existing corpus that is both available and prepared for supporting the development of language technologies. To support nêhiyawêwin revitalization and preservation, we developed a corpus covering diverse genres, time periods, and texts for a variety of intended audiences. The data has been verified and cleaned; it is ready for use in developing language technologies for nêhiyawêwin. The corpus includes the corresponding English phrases or audio files where available. We demonstrate the utility of the corpus through its community use and its use to build language technologies that can provide the types of support that community members have expressed are desirable. The corpus is available for public use.
%R 10.18653/v1/2022.acl-long.440
%U https://aclanthology.org/2022.acl-long.440
%U https://doi.org/10.18653/v1/2022.acl-long.440
%P 6354-6364
Markdown (Informal)
[Cree Corpus: A Collection of nêhiyawêwin Resources](https://aclanthology.org/2022.acl-long.440) (Teodorescu et al., ACL 2022)
ACL
- Daniela Teodorescu, Josie Matalski, Delaney Lothian, Denilson Barbosa, and Carrie Demmans Epp. 2022. Cree Corpus: A Collection of nêhiyawêwin Resources. In Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 6354–6364, Dublin, Ireland. Association for Computational Linguistics.