@inproceedings{gao-etal-2025-wenzhou,
title = "Wenzhou Dialect Speech to {M}andarin Text Conversion",
author = "Gao, Zhipeng and
Tamura, Akihiro and
Kato, Tsuneo",
editor = "Ojha, Atul Kr. and
Liu, Chao-hong and
Vylomova, Ekaterina and
Pirinen, Flammie and
Washington, Jonathan and
Oco, Nathaniel and
Zhao, Xiaobing",
booktitle = "Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico, U.S.A.",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loresmt-1.5/",
doi = "10.18653/v1/2025.loresmt-1.5",
pages = "36--43",
ISBN = "979-8-89176-230-5",
abstract = "The Wenzhou dialect is a Chinese dialect that is significantly distinct from Mandarin, the official language of China. It is among the most complex Chinese dialects and is nearly incomprehensible to people from regions such as Northern China, thereby creating substantial communication barriers. Therefore, the conversion between the Wenzhou dialect and Mandarin is essential to facilitate communication between Wenzhou dialect speakers and those from other Chinese regions. However, as a low-resource language, the Wenzhou dialect lacks publicly available datasets, and such conversion technologies have not been extensively researched. Thus, in this study, we create a parallel dataset containing Wenzhou dialect speech and the corresponding Mandarin text and build benchmark models for Wenzhou dialect speech-to-Mandarin text conversion. In particular, we fine-tune two self-supervised learning-based pretrained models, that is, TeleSpeech-ASR1.0 and Wav2Vec2-XLS-R, with our training dataset and report their performance on our test dataset as baselines for future research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gao-etal-2025-wenzhou">
<titleInfo>
<title>Wenzhou Dialect Speech to Mandarin Text Conversion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhipeng</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akihiro</namePart>
<namePart type="family">Tamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tsuneo</namePart>
<namePart type="family">Kato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao-hong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Flammie</namePart>
<namePart type="family">Pirinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">Washington</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathaniel</namePart>
<namePart type="family">Oco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaobing</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico, U.S.A.</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-230-5</identifier>
</relatedItem>
<abstract>The Wenzhou dialect is a Chinese dialect that is significantly distinct from Mandarin, the official language of China. It is among the most complex Chinese dialects and is nearly incomprehensible to people from regions such as Northern China, thereby creating substantial communication barriers. Therefore, the conversion between the Wenzhou dialect and Mandarin is essential to facilitate communication between Wenzhou dialect speakers and those from other Chinese regions. However, as a low-resource language, the Wenzhou dialect lacks publicly available datasets, and such conversion technologies have not been extensively researched. Thus, in this study, we create a parallel dataset containing Wenzhou dialect speech and the corresponding Mandarin text and build benchmark models for Wenzhou dialect speech-to-Mandarin text conversion. In particular, we fine-tune two self-supervised learning-based pretrained models, that is, TeleSpeech-ASR1.0 and Wav2Vec2-XLS-R, with our training dataset and report their performance on our test dataset as baselines for future research.</abstract>
<identifier type="citekey">gao-etal-2025-wenzhou</identifier>
<identifier type="doi">10.18653/v1/2025.loresmt-1.5</identifier>
<location>
<url>https://aclanthology.org/2025.loresmt-1.5/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>36</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Wenzhou Dialect Speech to Mandarin Text Conversion
%A Gao, Zhipeng
%A Tamura, Akihiro
%A Kato, Tsuneo
%Y Ojha, Atul Kr.
%Y Liu, Chao-hong
%Y Vylomova, Ekaterina
%Y Pirinen, Flammie
%Y Washington, Jonathan
%Y Oco, Nathaniel
%Y Zhao, Xiaobing
%S Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico, U.S.A.
%@ 979-8-89176-230-5
%F gao-etal-2025-wenzhou
%X The Wenzhou dialect is a Chinese dialect that is significantly distinct from Mandarin, the official language of China. It is among the most complex Chinese dialects and is nearly incomprehensible to people from regions such as Northern China, thereby creating substantial communication barriers. Therefore, the conversion between the Wenzhou dialect and Mandarin is essential to facilitate communication between Wenzhou dialect speakers and those from other Chinese regions. However, as a low-resource language, the Wenzhou dialect lacks publicly available datasets, and such conversion technologies have not been extensively researched. Thus, in this study, we create a parallel dataset containing Wenzhou dialect speech and the corresponding Mandarin text and build benchmark models for Wenzhou dialect speech-to-Mandarin text conversion. In particular, we fine-tune two self-supervised learning-based pretrained models, that is, TeleSpeech-ASR1.0 and Wav2Vec2-XLS-R, with our training dataset and report their performance on our test dataset as baselines for future research.
%R 10.18653/v1/2025.loresmt-1.5
%U https://aclanthology.org/2025.loresmt-1.5/
%U https://doi.org/10.18653/v1/2025.loresmt-1.5
%P 36-43
Markdown (Informal)
[Wenzhou Dialect Speech to Mandarin Text Conversion](https://aclanthology.org/2025.loresmt-1.5/) (Gao et al., LoResMT 2025)
ACL
- Zhipeng Gao, Akihiro Tamura, and Tsuneo Kato. 2025. Wenzhou Dialect Speech to Mandarin Text Conversion. In Proceedings of the Eighth Workshop on Technologies for Machine Translation of Low-Resource Languages (LoResMT 2025), pages 36–43, Albuquerque, New Mexico, U.S.A.. Association for Computational Linguistics.