@inproceedings{cusenza-coltekin-2024-nlp,
title = {{NLP} for Arb{\"e}resh: How an Endangered Language Learns to Write in the 21st Century},
author = {Cusenza, Giulio and
{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i}},
editor = "Melero, Maite and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.sigul-1.30",
pages = "252--256",
abstract = {Societies are becoming more and more connected, and minority languages often find themselves helpless against the advent of the digital age, with their speakers having to regularly turn to other languages for written communication. This work introduces the case of Arb{\"e}resh, a southern Italian language related to Albanian. It presents the very first machine-readable Arb{\"e}resh data, collected through a web campaign, and describes a set of tools developed to enable the Arb{\"e}resh people to learn how to write their language, including a spellchecker, a conjugator, a numeral generator, and an interactive platform to learn Arb{\"e}resh spelling. A comprehensive web application was set up to make these tools available to the public, as well as to collect further data through them. This method can be replicated to help revive other minority languages in a situation similar to Arb{\"e}resh{'}s. The main challenges of the process were the extremely low-resource setting and the variability of Arb{\"e}resh dialects.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cusenza-coltekin-2024-nlp">
<titleInfo>
<title>NLP for Arbëresh: How an Endangered Language Learns to Write in the 21st Century</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giulio</namePart>
<namePart type="family">Cusenza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maite</namePart>
<namePart type="family">Melero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Societies are becoming more and more connected, and minority languages often find themselves helpless against the advent of the digital age, with their speakers having to regularly turn to other languages for written communication. This work introduces the case of Arbëresh, a southern Italian language related to Albanian. It presents the very first machine-readable Arbëresh data, collected through a web campaign, and describes a set of tools developed to enable the Arbëresh people to learn how to write their language, including a spellchecker, a conjugator, a numeral generator, and an interactive platform to learn Arbëresh spelling. A comprehensive web application was set up to make these tools available to the public, as well as to collect further data through them. This method can be replicated to help revive other minority languages in a situation similar to Arbëresh’s. The main challenges of the process were the extremely low-resource setting and the variability of Arbëresh dialects.</abstract>
<identifier type="citekey">cusenza-coltekin-2024-nlp</identifier>
<location>
<url>https://aclanthology.org/2024.sigul-1.30</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>252</start>
<end>256</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NLP for Arbëresh: How an Endangered Language Learns to Write in the 21st Century
%A Cusenza, Giulio
%A Çöltekin, Çağrı
%Y Melero, Maite
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F cusenza-coltekin-2024-nlp
%X Societies are becoming more and more connected, and minority languages often find themselves helpless against the advent of the digital age, with their speakers having to regularly turn to other languages for written communication. This work introduces the case of Arbëresh, a southern Italian language related to Albanian. It presents the very first machine-readable Arbëresh data, collected through a web campaign, and describes a set of tools developed to enable the Arbëresh people to learn how to write their language, including a spellchecker, a conjugator, a numeral generator, and an interactive platform to learn Arbëresh spelling. A comprehensive web application was set up to make these tools available to the public, as well as to collect further data through them. This method can be replicated to help revive other minority languages in a situation similar to Arbëresh’s. The main challenges of the process were the extremely low-resource setting and the variability of Arbëresh dialects.
%U https://aclanthology.org/2024.sigul-1.30
%P 252-256
Markdown (Informal)
[NLP for Arbëresh: How an Endangered Language Learns to Write in the 21st Century](https://aclanthology.org/2024.sigul-1.30) (Cusenza & Çöltekin, SIGUL-WS 2024)
ACL