@inproceedings{hiovain-asikainen-etal-2025-worlds,
title = "The world{'}s first {S}outh {S}{\'a}mi {TTS} {--} a revitalisation effort of an endangered language by reviving a legacy voice",
author = "Hiovain-Asikainen, Katri and
Kj{\ae}rstad, Thomas B. and
Kappfjell, Maja Lisa and
Moshagen, Sjur N.",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Rie{\ss}ler, Michael and
Morooka, Eiaki V. and
Kharlashkin, Lev},
booktitle = "Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2025",
address = "Joensuu, Finland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwclul-1.3/",
pages = "12--21",
ISBN = "979-8-89176-360-9",
abstract = "South S{\'a}mi (ISO 639: SMA) is a severely endangered language spoken by the South S{\'a}mi people in Norway and Sweden. Estimates of the number of speakers vary from 500 to 600. Recent advances in speech technology and the general increase in popularity of spoken language and audio content have facilitated the development of modern speech technology tools also for minority languages, such as the S{\'a}mi languages. The current paper documents the development process of the world{'}s first South S{\'a}mi text-to-speech (TTS) system, using only digitized archive materials from 1989{--}1993 as the training material. To reach an end-user suitable quality of the TTS, we have used a neural, end-to-end approach with a rule-based text processing module. The aim of our project is to contribute to the language revitalization by offering tools for language users to use spoken language in new contexts. Since the modern written standard of South S{\'a}mi was established as late as in 1978, the rise of speech technology might encourage language use even for people who are not accustomed to the written standar."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hiovain-asikainen-etal-2025-worlds">
<titleInfo>
<title>The world’s first South Sámi TTS – a revitalisation effort of an endangered language by reviving a legacy voice</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katri</namePart>
<namePart type="family">Hiovain-Asikainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Kjærstad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="given">Lisa</namePart>
<namePart type="family">Kappfjell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sjur</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Moshagen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Rießler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiaki</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Morooka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lev</namePart>
<namePart type="family">Kharlashkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Joensuu, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-360-9</identifier>
</relatedItem>
<abstract>South Sámi (ISO 639: SMA) is a severely endangered language spoken by the South Sámi people in Norway and Sweden. Estimates of the number of speakers vary from 500 to 600. Recent advances in speech technology and the general increase in popularity of spoken language and audio content have facilitated the development of modern speech technology tools also for minority languages, such as the Sámi languages. The current paper documents the development process of the world’s first South Sámi text-to-speech (TTS) system, using only digitized archive materials from 1989–1993 as the training material. To reach an end-user suitable quality of the TTS, we have used a neural, end-to-end approach with a rule-based text processing module. The aim of our project is to contribute to the language revitalization by offering tools for language users to use spoken language in new contexts. Since the modern written standard of South Sámi was established as late as in 1978, the rise of speech technology might encourage language use even for people who are not accustomed to the written standar.</abstract>
<identifier type="citekey">hiovain-asikainen-etal-2025-worlds</identifier>
<location>
<url>https://aclanthology.org/2025.iwclul-1.3/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>12</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The world’s first South Sámi TTS – a revitalisation effort of an endangered language by reviving a legacy voice
%A Hiovain-Asikainen, Katri
%A Kjærstad, Thomas B.
%A Kappfjell, Maja Lisa
%A Moshagen, Sjur N.
%Y Hämäläinen, Mika
%Y Rießler, Michael
%Y Morooka, Eiaki V.
%Y Kharlashkin, Lev
%S Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages
%D 2025
%8 December
%I Association for Computational Linguistics
%C Joensuu, Finland
%@ 979-8-89176-360-9
%F hiovain-asikainen-etal-2025-worlds
%X South Sámi (ISO 639: SMA) is a severely endangered language spoken by the South Sámi people in Norway and Sweden. Estimates of the number of speakers vary from 500 to 600. Recent advances in speech technology and the general increase in popularity of spoken language and audio content have facilitated the development of modern speech technology tools also for minority languages, such as the Sámi languages. The current paper documents the development process of the world’s first South Sámi text-to-speech (TTS) system, using only digitized archive materials from 1989–1993 as the training material. To reach an end-user suitable quality of the TTS, we have used a neural, end-to-end approach with a rule-based text processing module. The aim of our project is to contribute to the language revitalization by offering tools for language users to use spoken language in new contexts. Since the modern written standard of South Sámi was established as late as in 1978, the rise of speech technology might encourage language use even for people who are not accustomed to the written standar.
%U https://aclanthology.org/2025.iwclul-1.3/
%P 12-21
Markdown (Informal)
[The world’s first South Sámi TTS – a revitalisation effort of an endangered language by reviving a legacy voice](https://aclanthology.org/2025.iwclul-1.3/) (Hiovain-Asikainen et al., IWCLUL 2025)
ACL