@inproceedings{ma-etal-2023-zao,
title = "噪声鲁棒的蒙古语语音数据增广模型结构(Noise robust {M}ongolian speech data augmentation model structure)",
author = "Ma, Zhiqaing and
Sun, Jiaqi and
Li, Jinyi and
Wang, Jiatai",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.14",
pages = "155--165",
abstract = "{``}蒙古语语料库中语音多样性匮乏,虽然花费人力和经费收集数据在一定程度上能够增加语音的数量,但整个过程需要耗费大量的时间。数据增广能够解决这种数据匮乏问题,但数据增广模型的训练数据包含的环境噪声无法控制,导致增广语音中存在背景噪声。本文提出一种TTS和语音增强相结合的语音数据增广方法,以语音的频谱图为基础,从频域和时域两个维度进行语音增强。通过多组实验证明,蒙古语增广语音的合格率达到70{\%},增广语音的CBAK和COVL分别下降了0.66和0.81,WER和SER下降了2.75{\%}和2.05{\%}。{''}",
language = "Chinese",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ma-etal-2023-zao">
<titleInfo>
<title>噪声鲁棒的蒙古语语音数据增广模型结构(Noise robust Mongolian speech data augmentation model structure)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhiqaing</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaqi</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinyi</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiatai</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">Chinese</languageTerm>
<languageTerm type="code" authority="iso639-2b">chi</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“蒙古语语料库中语音多样性匮乏,虽然花费人力和经费收集数据在一定程度上能够增加语音的数量,但整个过程需要耗费大量的时间。数据增广能够解决这种数据匮乏问题,但数据增广模型的训练数据包含的环境噪声无法控制,导致增广语音中存在背景噪声。本文提出一种TTS和语音增强相结合的语音数据增广方法,以语音的频谱图为基础,从频域和时域两个维度进行语音增强。通过多组实验证明,蒙古语增广语音的合格率达到70%,增广语音的CBAK和COVL分别下降了0.66和0.81,WER和SER下降了2.75%和2.05%。”</abstract>
<identifier type="citekey">ma-etal-2023-zao</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.14</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>155</start>
<end>165</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T 噪声鲁棒的蒙古语语音数据增广模型结构(Noise robust Mongolian speech data augmentation model structure)
%A Ma, Zhiqaing
%A Sun, Jiaqi
%A Li, Jinyi
%A Wang, Jiatai
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G Chinese
%F ma-etal-2023-zao
%X “蒙古语语料库中语音多样性匮乏,虽然花费人力和经费收集数据在一定程度上能够增加语音的数量,但整个过程需要耗费大量的时间。数据增广能够解决这种数据匮乏问题,但数据增广模型的训练数据包含的环境噪声无法控制,导致增广语音中存在背景噪声。本文提出一种TTS和语音增强相结合的语音数据增广方法,以语音的频谱图为基础,从频域和时域两个维度进行语音增强。通过多组实验证明,蒙古语增广语音的合格率达到70%,增广语音的CBAK和COVL分别下降了0.66和0.81,WER和SER下降了2.75%和2.05%。”
%U https://aclanthology.org/2023.ccl-1.14
%P 155-165
Markdown (Informal)
[噪声鲁棒的蒙古语语音数据增广模型结构(Noise robust Mongolian speech data augmentation model structure)](https://aclanthology.org/2023.ccl-1.14) (Ma et al., CCL 2023)
ACL