@inproceedings{hagemeijer-etal-2014-gulf,
title = "The {G}ulf of {G}uinea Creole Corpora",
author = "Hagemeijer, Tjerk and
G{\'e}n{\'e}reux, Michel and
Hendrickx, Iris and
Mendes, Am{\'a}lia and
Tiny, Abigail and
Zamora, Armando",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/438_Paper.pdf",
pages = "523--529",
abstract = "We present the process of building linguistic corpora of the Portuguese-related Gulf of Guinea creoles, a cluster of four historically related languages: Santome, Angolar, Principense and Fa dAmb{\^o}. We faced the typical difficulties of languages lacking an official status, such as lack of standard spelling, language variation, lack of basic language instruments, and small data sets, which comprise data from the late 19th century to the present. In order to tackle these problems, the compiled written and transcribed spoken data collected during field work trips were adapted to a normalized spelling that was applied to the four languages. For the corpus compilation we followed corpus linguistics standards. We recorded meta data for each file and added morphosyntactic information based on a part-of-speech tag set that was designed to deal with the specificities of these languages. The corpora of three of the four creoles are already available and searchable via an online web interface.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hagemeijer-etal-2014-gulf">
<titleInfo>
<title>The Gulf of Guinea Creole Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tjerk</namePart>
<namePart type="family">Hagemeijer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Généreux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iris</namePart>
<namePart type="family">Hendrickx</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amália</namePart>
<namePart type="family">Mendes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abigail</namePart>
<namePart type="family">Tiny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Armando</namePart>
<namePart type="family">Zamora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the process of building linguistic corpora of the Portuguese-related Gulf of Guinea creoles, a cluster of four historically related languages: Santome, Angolar, Principense and Fa dAmbô. We faced the typical difficulties of languages lacking an official status, such as lack of standard spelling, language variation, lack of basic language instruments, and small data sets, which comprise data from the late 19th century to the present. In order to tackle these problems, the compiled written and transcribed spoken data collected during field work trips were adapted to a normalized spelling that was applied to the four languages. For the corpus compilation we followed corpus linguistics standards. We recorded meta data for each file and added morphosyntactic information based on a part-of-speech tag set that was designed to deal with the specificities of these languages. The corpora of three of the four creoles are already available and searchable via an online web interface.</abstract>
<identifier type="citekey">hagemeijer-etal-2014-gulf</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/438_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>523</start>
<end>529</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Gulf of Guinea Creole Corpora
%A Hagemeijer, Tjerk
%A Généreux, Michel
%A Hendrickx, Iris
%A Mendes, Amália
%A Tiny, Abigail
%A Zamora, Armando
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F hagemeijer-etal-2014-gulf
%X We present the process of building linguistic corpora of the Portuguese-related Gulf of Guinea creoles, a cluster of four historically related languages: Santome, Angolar, Principense and Fa dAmbô. We faced the typical difficulties of languages lacking an official status, such as lack of standard spelling, language variation, lack of basic language instruments, and small data sets, which comprise data from the late 19th century to the present. In order to tackle these problems, the compiled written and transcribed spoken data collected during field work trips were adapted to a normalized spelling that was applied to the four languages. For the corpus compilation we followed corpus linguistics standards. We recorded meta data for each file and added morphosyntactic information based on a part-of-speech tag set that was designed to deal with the specificities of these languages. The corpora of three of the four creoles are already available and searchable via an online web interface.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/438_Paper.pdf
%P 523-529
Markdown (Informal)
[The Gulf of Guinea Creole Corpora](http://www.lrec-conf.org/proceedings/lrec2014/pdf/438_Paper.pdf) (Hagemeijer et al., LREC 2014)
ACL
- Tjerk Hagemeijer, Michel Généreux, Iris Hendrickx, Amália Mendes, Abigail Tiny, and Armando Zamora. 2014. The Gulf of Guinea Creole Corpora. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 523–529, Reykjavik, Iceland. European Language Resources Association (ELRA).