@inproceedings{ridha-sakti-2024-refining,
title = "Refining rt{MRI} Landmark-Based Vocal Tract Contour Labels with {FCN}-Based Smoothing and Point-to-Curve Projection",
author = "Ridha, Mushaffa Rasyid and
Sakti, Sakriani",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1204",
pages = "13796--13802",
abstract = "Advanced real-time Magnetic Resonance Imaging (rtMRI) enables researchers to study dynamic articulatory movements during speech production with high temporal resolution. However, accurately outlining articulator contours in high-frame-rate rtMRI presents challenges due to data scalability and image quality issues, making manual and automatic labeling difficult. The widely used publicly available USC-TIMIT dataset offers rtMRI data with landmark-based contour labels derived from unsupervised region segmentation using spatial frequency domain representation and gradient descent optimization. Unfortunately, occasional labeling errors exist, and many contour detection methods were trained and tested based on this ground truth, which is not purely a gold label, with the resulting contour data largely remaining undisclosed to the public. This paper offers a refinement of landmark-based vocal-tract contour labels by employing outlier removal, full convolutional network (FCN)-based smoothing, and a landmark point-to-edge curve projection technique. Since there is no established ground truth label, we evaluate the quality of the new labels through subjective assessments of several contour areas, comparing them to the existing data labels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ridha-sakti-2024-refining">
<titleInfo>
<title>Refining rtMRI Landmark-Based Vocal Tract Contour Labels with FCN-Based Smoothing and Point-to-Curve Projection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mushaffa</namePart>
<namePart type="given">Rasyid</namePart>
<namePart type="family">Ridha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Advanced real-time Magnetic Resonance Imaging (rtMRI) enables researchers to study dynamic articulatory movements during speech production with high temporal resolution. However, accurately outlining articulator contours in high-frame-rate rtMRI presents challenges due to data scalability and image quality issues, making manual and automatic labeling difficult. The widely used publicly available USC-TIMIT dataset offers rtMRI data with landmark-based contour labels derived from unsupervised region segmentation using spatial frequency domain representation and gradient descent optimization. Unfortunately, occasional labeling errors exist, and many contour detection methods were trained and tested based on this ground truth, which is not purely a gold label, with the resulting contour data largely remaining undisclosed to the public. This paper offers a refinement of landmark-based vocal-tract contour labels by employing outlier removal, full convolutional network (FCN)-based smoothing, and a landmark point-to-edge curve projection technique. Since there is no established ground truth label, we evaluate the quality of the new labels through subjective assessments of several contour areas, comparing them to the existing data labels.</abstract>
<identifier type="citekey">ridha-sakti-2024-refining</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1204</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>13796</start>
<end>13802</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Refining rtMRI Landmark-Based Vocal Tract Contour Labels with FCN-Based Smoothing and Point-to-Curve Projection
%A Ridha, Mushaffa Rasyid
%A Sakti, Sakriani
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ridha-sakti-2024-refining
%X Advanced real-time Magnetic Resonance Imaging (rtMRI) enables researchers to study dynamic articulatory movements during speech production with high temporal resolution. However, accurately outlining articulator contours in high-frame-rate rtMRI presents challenges due to data scalability and image quality issues, making manual and automatic labeling difficult. The widely used publicly available USC-TIMIT dataset offers rtMRI data with landmark-based contour labels derived from unsupervised region segmentation using spatial frequency domain representation and gradient descent optimization. Unfortunately, occasional labeling errors exist, and many contour detection methods were trained and tested based on this ground truth, which is not purely a gold label, with the resulting contour data largely remaining undisclosed to the public. This paper offers a refinement of landmark-based vocal-tract contour labels by employing outlier removal, full convolutional network (FCN)-based smoothing, and a landmark point-to-edge curve projection technique. Since there is no established ground truth label, we evaluate the quality of the new labels through subjective assessments of several contour areas, comparing them to the existing data labels.
%U https://aclanthology.org/2024.lrec-main.1204
%P 13796-13802
Markdown (Informal)
[Refining rtMRI Landmark-Based Vocal Tract Contour Labels with FCN-Based Smoothing and Point-to-Curve Projection](https://aclanthology.org/2024.lrec-main.1204) (Ridha & Sakti, LREC-COLING 2024)
ACL