@inproceedings{thinakaran-etal-2024-utilizing,
title = "Utilizing {POS}-Driven Pitch Contour Analysis for Enhanced {T}amil Text-to-Speech Synthesis",
author = "Thinakaran, Preethi and
Gladston, Anushiya Rachel and
Vijayalakshmi, P and
Nagarajan, T and
Muthuramalingam, Malarvizhi and
S, Sooriya",
editor = "Lalitha Devi, Sobha and
Arora, Karunesh",
booktitle = "Proceedings of the 21st International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2024",
address = "AU-KBC Research Centre, Chennai, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2024.icon-1.32/",
pages = "269--273",
abstract = "A novel approach to text-to-speech synthesis that integrates pitch contour labels derived from the highest occurrence analysis for each Part-of-Speech (POS) tag. Using the Stanford POS Tagger, grammatical tags are assigned to words, and the most frequently occurring pitch contour labels associated with these tags are analyzed, focusing on both unigram and bigram statistics. The primary goal is to identify the pitch contour for each POS tag based on its frequency of occurrence. These pitch contour labels are incorporated into the output of the synthesized waveform using the TD-PSOLA (Time Domain Pitch Synchronous Overlap and Add) signal processing algorithm. The resulting waveform is evaluated using Mean Opinion Scores (MOS), demonstrating significant enhancements in quality and producing a prosodically rich synthetic speech."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thinakaran-etal-2024-utilizing">
<titleInfo>
<title>Utilizing POS-Driven Pitch Contour Analysis for Enhanced Tamil Text-to-Speech Synthesis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Preethi</namePart>
<namePart type="family">Thinakaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anushiya</namePart>
<namePart type="given">Rachel</namePart>
<namePart type="family">Gladston</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">P</namePart>
<namePart type="family">Vijayalakshmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">T</namePart>
<namePart type="family">Nagarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malarvizhi</namePart>
<namePart type="family">Muthuramalingam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sooriya</namePart>
<namePart type="family">S</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karunesh</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">AU-KBC Research Centre, Chennai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A novel approach to text-to-speech synthesis that integrates pitch contour labels derived from the highest occurrence analysis for each Part-of-Speech (POS) tag. Using the Stanford POS Tagger, grammatical tags are assigned to words, and the most frequently occurring pitch contour labels associated with these tags are analyzed, focusing on both unigram and bigram statistics. The primary goal is to identify the pitch contour for each POS tag based on its frequency of occurrence. These pitch contour labels are incorporated into the output of the synthesized waveform using the TD-PSOLA (Time Domain Pitch Synchronous Overlap and Add) signal processing algorithm. The resulting waveform is evaluated using Mean Opinion Scores (MOS), demonstrating significant enhancements in quality and producing a prosodically rich synthetic speech.</abstract>
<identifier type="citekey">thinakaran-etal-2024-utilizing</identifier>
<location>
<url>https://aclanthology.org/2024.icon-1.32/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>269</start>
<end>273</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Utilizing POS-Driven Pitch Contour Analysis for Enhanced Tamil Text-to-Speech Synthesis
%A Thinakaran, Preethi
%A Gladston, Anushiya Rachel
%A Vijayalakshmi, P.
%A Nagarajan, T.
%A Muthuramalingam, Malarvizhi
%A S, Sooriya
%Y Lalitha Devi, Sobha
%Y Arora, Karunesh
%S Proceedings of the 21st International Conference on Natural Language Processing (ICON)
%D 2024
%8 December
%I NLP Association of India (NLPAI)
%C AU-KBC Research Centre, Chennai, India
%F thinakaran-etal-2024-utilizing
%X A novel approach to text-to-speech synthesis that integrates pitch contour labels derived from the highest occurrence analysis for each Part-of-Speech (POS) tag. Using the Stanford POS Tagger, grammatical tags are assigned to words, and the most frequently occurring pitch contour labels associated with these tags are analyzed, focusing on both unigram and bigram statistics. The primary goal is to identify the pitch contour for each POS tag based on its frequency of occurrence. These pitch contour labels are incorporated into the output of the synthesized waveform using the TD-PSOLA (Time Domain Pitch Synchronous Overlap and Add) signal processing algorithm. The resulting waveform is evaluated using Mean Opinion Scores (MOS), demonstrating significant enhancements in quality and producing a prosodically rich synthetic speech.
%U https://aclanthology.org/2024.icon-1.32/
%P 269-273
Markdown (Informal)
[Utilizing POS-Driven Pitch Contour Analysis for Enhanced Tamil Text-to-Speech Synthesis](https://aclanthology.org/2024.icon-1.32/) (Thinakaran et al., ICON 2024)
ACL