@inproceedings{clarke-etal-2024-guylingo,
title = "{G}uy{L}ingo: The {R}epublic of {G}uyana Creole Corpora",
author = "Clarke, Christopher and
Daynauth, Roland and
Mars, Jason and
Wilkinson, Charlene and
Devonish, Hubert",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-short.70",
doi = "10.18653/v1/2024.naacl-short.70",
pages = "792--798",
abstract = "While major languages often enjoy substantial attention and resources, the linguistic diversity across the globe encompasses a multitude of smaller, indigenous, and regional languages that lack the same level of computational support. One such region is the Caribbean. While commonly labeled as {``}English speaking{''}, the ex-British Caribbean region consists of a myriad of Creole languages thriving alongside English. In this paper, we present Guylingo: a comprehensive corpus designed for advancing NLP research in the domain of Creolese (Guyanese English-lexicon Creole), the most widely spoken language in the culturally rich nation of Guyana. We first outline our framework for gathering and digitizing this diverse corpus, inclusive of colloquial expressions, idioms, and regional variations in a low-resource language. We then demonstrate the challenges of training and evaluating NLP models for machine translation for Creolese. Lastly, we discuss the unique opportunities presented by recent NLP advancements for accelerating the formal adoption of Creole languages as official languages in the Caribbean.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="clarke-etal-2024-guylingo">
<titleInfo>
<title>GuyLingo: The Republic of Guyana Creole Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Clarke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roland</namePart>
<namePart type="family">Daynauth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Mars</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charlene</namePart>
<namePart type="family">Wilkinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hubert</namePart>
<namePart type="family">Devonish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While major languages often enjoy substantial attention and resources, the linguistic diversity across the globe encompasses a multitude of smaller, indigenous, and regional languages that lack the same level of computational support. One such region is the Caribbean. While commonly labeled as “English speaking”, the ex-British Caribbean region consists of a myriad of Creole languages thriving alongside English. In this paper, we present Guylingo: a comprehensive corpus designed for advancing NLP research in the domain of Creolese (Guyanese English-lexicon Creole), the most widely spoken language in the culturally rich nation of Guyana. We first outline our framework for gathering and digitizing this diverse corpus, inclusive of colloquial expressions, idioms, and regional variations in a low-resource language. We then demonstrate the challenges of training and evaluating NLP models for machine translation for Creolese. Lastly, we discuss the unique opportunities presented by recent NLP advancements for accelerating the formal adoption of Creole languages as official languages in the Caribbean.</abstract>
<identifier type="citekey">clarke-etal-2024-guylingo</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-short.70</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-short.70</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>792</start>
<end>798</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GuyLingo: The Republic of Guyana Creole Corpora
%A Clarke, Christopher
%A Daynauth, Roland
%A Mars, Jason
%A Wilkinson, Charlene
%A Devonish, Hubert
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F clarke-etal-2024-guylingo
%X While major languages often enjoy substantial attention and resources, the linguistic diversity across the globe encompasses a multitude of smaller, indigenous, and regional languages that lack the same level of computational support. One such region is the Caribbean. While commonly labeled as “English speaking”, the ex-British Caribbean region consists of a myriad of Creole languages thriving alongside English. In this paper, we present Guylingo: a comprehensive corpus designed for advancing NLP research in the domain of Creolese (Guyanese English-lexicon Creole), the most widely spoken language in the culturally rich nation of Guyana. We first outline our framework for gathering and digitizing this diverse corpus, inclusive of colloquial expressions, idioms, and regional variations in a low-resource language. We then demonstrate the challenges of training and evaluating NLP models for machine translation for Creolese. Lastly, we discuss the unique opportunities presented by recent NLP advancements for accelerating the formal adoption of Creole languages as official languages in the Caribbean.
%R 10.18653/v1/2024.naacl-short.70
%U https://aclanthology.org/2024.naacl-short.70
%U https://doi.org/10.18653/v1/2024.naacl-short.70
%P 792-798
Markdown (Informal)
[GuyLingo: The Republic of Guyana Creole Corpora](https://aclanthology.org/2024.naacl-short.70) (Clarke et al., NAACL 2024)
ACL
- Christopher Clarke, Roland Daynauth, Jason Mars, Charlene Wilkinson, and Hubert Devonish. 2024. GuyLingo: The Republic of Guyana Creole Corpora. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 2: Short Papers), pages 792–798, Mexico City, Mexico. Association for Computational Linguistics.