@inproceedings{chen-etal-2016-clustering-based,
title = "Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced {ASR}",
author = "Chen, Wenda and
Hasegawa-Johnson, Mark and
Chen, Nancy and
Jyothi, Preethi and
Varshney, Lav",
editor = "Wu, Dekai and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 6th Workshop on South and Southeast {A}sian Natural Language Processing ({WSSANLP}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-3714",
pages = "133--141",
abstract = "Acquiring labeled speech for low-resource languages is a difficult task in the absence of native speakers of the language. One solution to this problem involves collecting speech transcriptions from crowd workers who are foreign or non-native speakers of a given target language. From these mismatched transcriptions, one can derive probabilistic phone transcriptions that are defined over the set of all target language phones using a noisy channel model. This paper extends prior work on deriving probabilistic transcriptions (PTs) from mismatched transcriptions by 1) modelling multilingual channels and 2) introducing a clustering-based phonetic mapping technique to improve the quality of PTs. Mismatched crowdsourcing for multilingual channels has certain properties of projection mapping, e.g., it can be interpreted as a clustering based on singular value decomposition of the segment alignments. To this end, we explore the use of distinctive feature weights, lexical tone confusions, and a two-step clustering algorithm to learn projections of phoneme segments from mismatched multilingual transcriber languages to the target language. We evaluate our techniques using mismatched transcriptions for Cantonese speech acquired from native English and Mandarin speakers. We observe a 5-9{\%} relative reduction in phone error rate for the predicted Cantonese phone transcriptions using our proposed techniques compared with the previous PT method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2016-clustering-based">
<titleInfo>
<title>Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenda</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Hasegawa-Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nancy</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preethi</namePart>
<namePart type="family">Jyothi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lav</namePart>
<namePart type="family">Varshney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dekai</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Acquiring labeled speech for low-resource languages is a difficult task in the absence of native speakers of the language. One solution to this problem involves collecting speech transcriptions from crowd workers who are foreign or non-native speakers of a given target language. From these mismatched transcriptions, one can derive probabilistic phone transcriptions that are defined over the set of all target language phones using a noisy channel model. This paper extends prior work on deriving probabilistic transcriptions (PTs) from mismatched transcriptions by 1) modelling multilingual channels and 2) introducing a clustering-based phonetic mapping technique to improve the quality of PTs. Mismatched crowdsourcing for multilingual channels has certain properties of projection mapping, e.g., it can be interpreted as a clustering based on singular value decomposition of the segment alignments. To this end, we explore the use of distinctive feature weights, lexical tone confusions, and a two-step clustering algorithm to learn projections of phoneme segments from mismatched multilingual transcriber languages to the target language. We evaluate our techniques using mismatched transcriptions for Cantonese speech acquired from native English and Mandarin speakers. We observe a 5-9% relative reduction in phone error rate for the predicted Cantonese phone transcriptions using our proposed techniques compared with the previous PT method.</abstract>
<identifier type="citekey">chen-etal-2016-clustering-based</identifier>
<location>
<url>https://aclanthology.org/W16-3714</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>133</start>
<end>141</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced ASR
%A Chen, Wenda
%A Hasegawa-Johnson, Mark
%A Chen, Nancy
%A Jyothi, Preethi
%A Varshney, Lav
%Y Wu, Dekai
%Y Bhattacharyya, Pushpak
%S Proceedings of the 6th Workshop on South and Southeast Asian Natural Language Processing (WSSANLP2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F chen-etal-2016-clustering-based
%X Acquiring labeled speech for low-resource languages is a difficult task in the absence of native speakers of the language. One solution to this problem involves collecting speech transcriptions from crowd workers who are foreign or non-native speakers of a given target language. From these mismatched transcriptions, one can derive probabilistic phone transcriptions that are defined over the set of all target language phones using a noisy channel model. This paper extends prior work on deriving probabilistic transcriptions (PTs) from mismatched transcriptions by 1) modelling multilingual channels and 2) introducing a clustering-based phonetic mapping technique to improve the quality of PTs. Mismatched crowdsourcing for multilingual channels has certain properties of projection mapping, e.g., it can be interpreted as a clustering based on singular value decomposition of the segment alignments. To this end, we explore the use of distinctive feature weights, lexical tone confusions, and a two-step clustering algorithm to learn projections of phoneme segments from mismatched multilingual transcriber languages to the target language. We evaluate our techniques using mismatched transcriptions for Cantonese speech acquired from native English and Mandarin speakers. We observe a 5-9% relative reduction in phone error rate for the predicted Cantonese phone transcriptions using our proposed techniques compared with the previous PT method.
%U https://aclanthology.org/W16-3714
%P 133-141
Markdown (Informal)
[Clustering-based Phonetic Projection in Mismatched Crowdsourcing Channels for Low-resourced ASR](https://aclanthology.org/W16-3714) (Chen et al., WSSANLP 2016)
ACL