@inproceedings{kiissel-etal-2025-estonian,
title = "{Estonian} isolated-word text-to-speech synthesiser",
author = "Kiissel, Indrek and
Piits, Liisi and
Sahkai, Heete and
Hein, Indrek and
Ermus, Liis and
Mihkla, Meelis",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.32/",
pages = "302--306",
ISBN = "978-9908-53-109-0",
abstract = "This paper presents the development and evaluation of an Estonian isolated-word text-to-speech (TTS) synthesiser. Unlike conventional TTS systems that convert continuous text into speech, this system focuses on the synthesis of isolated words, which is crucial for applications such as pronunciation training, speech therapy, and (learners') dictionaries. The system addresses two key challenges: generating natural prosody for isolated words and context-free disambiguation of homographs. We conducted a perception test to evaluate the performance of the TTS system in terms of pronunciation accuracy. We used 16 pairs of homographs that differ in palatalisation and 16 pairs of homographs that differ in quantity. Given that all the test items were correctly recognised by a majority of the evaluators, the performance of the synthesiser can be considered very good."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kiissel-etal-2025-estonian">
<titleInfo>
<title>Estonian isolated-word text-to-speech synthesiser</title>
</titleInfo>
<name type="personal">
<namePart type="given">Indrek</namePart>
<namePart type="family">Kiissel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liisi</namePart>
<namePart type="family">Piits</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heete</namePart>
<namePart type="family">Sahkai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Indrek</namePart>
<namePart type="family">Hein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liis</namePart>
<namePart type="family">Ermus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meelis</namePart>
<namePart type="family">Mihkla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>This paper presents the development and evaluation of an Estonian isolated-word text-to-speech (TTS) synthesiser. Unlike conventional TTS systems that convert continuous text into speech, this system focuses on the synthesis of isolated words, which is crucial for applications such as pronunciation training, speech therapy, and (learners’) dictionaries. The system addresses two key challenges: generating natural prosody for isolated words and context-free disambiguation of homographs. We conducted a perception test to evaluate the performance of the TTS system in terms of pronunciation accuracy. We used 16 pairs of homographs that differ in palatalisation and 16 pairs of homographs that differ in quantity. Given that all the test items were correctly recognised by a majority of the evaluators, the performance of the synthesiser can be considered very good.</abstract>
<identifier type="citekey">kiissel-etal-2025-estonian</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.32/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>302</start>
<end>306</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Estonian isolated-word text-to-speech synthesiser
%A Kiissel, Indrek
%A Piits, Liisi
%A Sahkai, Heete
%A Hein, Indrek
%A Ermus, Liis
%A Mihkla, Meelis
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F kiissel-etal-2025-estonian
%X This paper presents the development and evaluation of an Estonian isolated-word text-to-speech (TTS) synthesiser. Unlike conventional TTS systems that convert continuous text into speech, this system focuses on the synthesis of isolated words, which is crucial for applications such as pronunciation training, speech therapy, and (learners’) dictionaries. The system addresses two key challenges: generating natural prosody for isolated words and context-free disambiguation of homographs. We conducted a perception test to evaluate the performance of the TTS system in terms of pronunciation accuracy. We used 16 pairs of homographs that differ in palatalisation and 16 pairs of homographs that differ in quantity. Given that all the test items were correctly recognised by a majority of the evaluators, the performance of the synthesiser can be considered very good.
%U https://aclanthology.org/2025.nodalida-1.32/
%P 302-306
Markdown (Informal)
[Estonian isolated-word text-to-speech synthesiser](https://aclanthology.org/2025.nodalida-1.32/) (Kiissel et al., NoDaLiDa 2025)
ACL
- Indrek Kiissel, Liisi Piits, Heete Sahkai, Indrek Hein, Liis Ermus, and Meelis Mihkla. 2025. Estonian isolated-word text-to-speech synthesiser. In Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025), pages 302–306, Tallinn, Estonia. University of Tartu Library.