@inproceedings{vicente-cheng-2024-language,
title = "Language Identification of {P}hilippine Creole {S}panish: Discriminating {C}havacano From Related Languages",
author = "Vicente, Aileen Joan and
Cheng, Charibeth",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.vardial-1.16",
doi = "10.18653/v1/2024.vardial-1.16",
pages = "188--196",
abstract = "Chavacano is a Spanish Creole widely spoken in the southern regions of the Philippines. It is one of the many Philippine languages yet to be studied computationally. This paper presents the development of a language identification model of Chavacano to distinguish it from languages that influence its creolization using character convolutional networks. Unlike studies that discriminated similar languages based on geographical proximity, this paper reports a similarity focused on the creolization of a language. We established the similarity of Chavacano and its related languages, Spanish, Portuguese, Cebuano, and Hiligaynon, from the number of common words in the corpus for all languages. We report an accuracy of 93{\%} for the model generated using ten filters with a filter width of 5. The training experiments reveal that increasing the filter width, number of filters, or training epochs is unnecessary even if the accuracy increases because the generated models present irregular learning behavior or may have already been overfitted. This study also demonstrates that the character features extracted from convolutional neural networks, similar to n-grams, are sufficient in identifying Chavacano. Future work on the language identification of Chavacano includes improving classification accuracy for short or code-switched texts for practical applications such as social media sensors for disaster response and management.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vicente-cheng-2024-language">
<titleInfo>
<title>Language Identification of Philippine Creole Spanish: Discriminating Chavacano From Related Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aileen</namePart>
<namePart type="given">Joan</namePart>
<namePart type="family">Vicente</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charibeth</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Chavacano is a Spanish Creole widely spoken in the southern regions of the Philippines. It is one of the many Philippine languages yet to be studied computationally. This paper presents the development of a language identification model of Chavacano to distinguish it from languages that influence its creolization using character convolutional networks. Unlike studies that discriminated similar languages based on geographical proximity, this paper reports a similarity focused on the creolization of a language. We established the similarity of Chavacano and its related languages, Spanish, Portuguese, Cebuano, and Hiligaynon, from the number of common words in the corpus for all languages. We report an accuracy of 93% for the model generated using ten filters with a filter width of 5. The training experiments reveal that increasing the filter width, number of filters, or training epochs is unnecessary even if the accuracy increases because the generated models present irregular learning behavior or may have already been overfitted. This study also demonstrates that the character features extracted from convolutional neural networks, similar to n-grams, are sufficient in identifying Chavacano. Future work on the language identification of Chavacano includes improving classification accuracy for short or code-switched texts for practical applications such as social media sensors for disaster response and management.</abstract>
<identifier type="citekey">vicente-cheng-2024-language</identifier>
<identifier type="doi">10.18653/v1/2024.vardial-1.16</identifier>
<location>
<url>https://aclanthology.org/2024.vardial-1.16</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>188</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Identification of Philippine Creole Spanish: Discriminating Chavacano From Related Languages
%A Vicente, Aileen Joan
%A Cheng, Charibeth
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%S Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F vicente-cheng-2024-language
%X Chavacano is a Spanish Creole widely spoken in the southern regions of the Philippines. It is one of the many Philippine languages yet to be studied computationally. This paper presents the development of a language identification model of Chavacano to distinguish it from languages that influence its creolization using character convolutional networks. Unlike studies that discriminated similar languages based on geographical proximity, this paper reports a similarity focused on the creolization of a language. We established the similarity of Chavacano and its related languages, Spanish, Portuguese, Cebuano, and Hiligaynon, from the number of common words in the corpus for all languages. We report an accuracy of 93% for the model generated using ten filters with a filter width of 5. The training experiments reveal that increasing the filter width, number of filters, or training epochs is unnecessary even if the accuracy increases because the generated models present irregular learning behavior or may have already been overfitted. This study also demonstrates that the character features extracted from convolutional neural networks, similar to n-grams, are sufficient in identifying Chavacano. Future work on the language identification of Chavacano includes improving classification accuracy for short or code-switched texts for practical applications such as social media sensors for disaster response and management.
%R 10.18653/v1/2024.vardial-1.16
%U https://aclanthology.org/2024.vardial-1.16
%U https://doi.org/10.18653/v1/2024.vardial-1.16
%P 188-196
Markdown (Informal)
[Language Identification of Philippine Creole Spanish: Discriminating Chavacano From Related Languages](https://aclanthology.org/2024.vardial-1.16) (Vicente & Cheng, VarDial-WS 2024)
ACL