@inproceedings{imperial-etal-2022-baseline,
title = "A Baseline Readability Model for {C}ebuano",
author = "Imperial, Joseph Marvin and
Reyes, Lloyd Lois Antonie and
Ibanez, Michael Antonio and
Sapinit, Ranz and
Hussien, Mohammed",
editor = {Kochmar, Ekaterina and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Madnani, Nitin and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng and
Zesch, Torsten},
booktitle = "Proceedings of the 17th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2022)",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.bea-1.5/",
doi = "10.18653/v1/2022.bea-1.5",
pages = "27--32",
abstract = "In this study, we developed the first baseline readability model for the Cebuano language. Cebuano is the second most-used native language in the Philippines with about 27.5 million speakers. As the baseline, we extracted traditional or surface-based features, syllable patterns based from Cebuano`s documented orthography, and neural embeddings from the multilingual BERT model. Results show that the use of the first two handcrafted linguistic features obtained the best performance trained on an optimized Random Forest model with approximately 87{\%} across all metrics. The feature sets and algorithm used also is similar to previous results in readability assessment for the Filipino language{---}showing potential of crosslingual application. To encourage more work for readability assessment in Philippine languages such as Cebuano, we open-sourced both code and data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="imperial-etal-2022-baseline">
<titleInfo>
<title>A Baseline Readability Model for Cebuano</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">Marvin</namePart>
<namePart type="family">Imperial</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lloyd</namePart>
<namePart type="given">Lois</namePart>
<namePart type="given">Antonie</namePart>
<namePart type="family">Reyes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">Antonio</namePart>
<namePart type="family">Ibanez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranz</namePart>
<namePart type="family">Sapinit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Hussien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we developed the first baseline readability model for the Cebuano language. Cebuano is the second most-used native language in the Philippines with about 27.5 million speakers. As the baseline, we extracted traditional or surface-based features, syllable patterns based from Cebuano‘s documented orthography, and neural embeddings from the multilingual BERT model. Results show that the use of the first two handcrafted linguistic features obtained the best performance trained on an optimized Random Forest model with approximately 87% across all metrics. The feature sets and algorithm used also is similar to previous results in readability assessment for the Filipino language—showing potential of crosslingual application. To encourage more work for readability assessment in Philippine languages such as Cebuano, we open-sourced both code and data.</abstract>
<identifier type="citekey">imperial-etal-2022-baseline</identifier>
<identifier type="doi">10.18653/v1/2022.bea-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.bea-1.5/</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>27</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Baseline Readability Model for Cebuano
%A Imperial, Joseph Marvin
%A Reyes, Lloyd Lois Antonie
%A Ibanez, Michael Antonio
%A Sapinit, Ranz
%A Hussien, Mohammed
%Y Kochmar, Ekaterina
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Madnani, Nitin
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%Y Zesch, Torsten
%S Proceedings of the 17th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2022)
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F imperial-etal-2022-baseline
%X In this study, we developed the first baseline readability model for the Cebuano language. Cebuano is the second most-used native language in the Philippines with about 27.5 million speakers. As the baseline, we extracted traditional or surface-based features, syllable patterns based from Cebuano‘s documented orthography, and neural embeddings from the multilingual BERT model. Results show that the use of the first two handcrafted linguistic features obtained the best performance trained on an optimized Random Forest model with approximately 87% across all metrics. The feature sets and algorithm used also is similar to previous results in readability assessment for the Filipino language—showing potential of crosslingual application. To encourage more work for readability assessment in Philippine languages such as Cebuano, we open-sourced both code and data.
%R 10.18653/v1/2022.bea-1.5
%U https://aclanthology.org/2022.bea-1.5/
%U https://doi.org/10.18653/v1/2022.bea-1.5
%P 27-32
Markdown (Informal)
[A Baseline Readability Model for Cebuano](https://aclanthology.org/2022.bea-1.5/) (Imperial et al., BEA 2022)
ACL
- Joseph Marvin Imperial, Lloyd Lois Antonie Reyes, Michael Antonio Ibanez, Ranz Sapinit, and Mohammed Hussien. 2022. A Baseline Readability Model for Cebuano. In Proceedings of the 17th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2022), pages 27–32, Seattle, Washington. Association for Computational Linguistics.