@inproceedings{chen-etal-2025-metamixspeech,
title = "{M}eta{M}ix{S}peech: Meta Task Augmentation for Low-Resource Speech Recognition",
author = "Chen, Yaqi and
Zhang, Hao and
Zhang, Wenlin and
Yang, XuKui and
Qu, Dan and
Liu, Yunpeng",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.202/",
doi = "10.18653/v1/2025.findings-emnlp.202",
pages = "3769--3779",
ISBN = "979-8-89176-335-7",
abstract = "Meta-learning has proven to be a powerful paradigm for effectively improving the performance of low-resource speech recognition by learning generalizable knowledge across multiple tasks. However, multilingual meta learning also faces challenges such as task overfitting and learner overfitting, thereby reducing its ability to generalize to new tasks. To address these issues, we augment the meta-training task with ``more data'' during both training and evaluation phases. Concretely, we propose an interpolation-based task augmentation method called MetaMixSpeech, which includes both support augmentation and query augmentation. MetaMixSpeech enhances task diversity by linearly combining perturbed features from the support and query sets and performing the same linear interpolation on their corresponding losses. Experimental results on the FLEURS and Common Voice datasets demonstrate that MetaMixSpeech achieves a 6.35 {\%} improvement in Word Error Rate (WER) compared to meta-learning approaches, effectively mitigating the overfitting problem and showcasing superior generalization across diverse datasets and language families."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2025-metamixspeech">
<titleInfo>
<title>MetaMixSpeech: Meta Task Augmentation for Low-Resource Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaqi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenlin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">XuKui</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunpeng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Meta-learning has proven to be a powerful paradigm for effectively improving the performance of low-resource speech recognition by learning generalizable knowledge across multiple tasks. However, multilingual meta learning also faces challenges such as task overfitting and learner overfitting, thereby reducing its ability to generalize to new tasks. To address these issues, we augment the meta-training task with “more data” during both training and evaluation phases. Concretely, we propose an interpolation-based task augmentation method called MetaMixSpeech, which includes both support augmentation and query augmentation. MetaMixSpeech enhances task diversity by linearly combining perturbed features from the support and query sets and performing the same linear interpolation on their corresponding losses. Experimental results on the FLEURS and Common Voice datasets demonstrate that MetaMixSpeech achieves a 6.35 % improvement in Word Error Rate (WER) compared to meta-learning approaches, effectively mitigating the overfitting problem and showcasing superior generalization across diverse datasets and language families.</abstract>
<identifier type="citekey">chen-etal-2025-metamixspeech</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.202</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.202/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>3769</start>
<end>3779</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MetaMixSpeech: Meta Task Augmentation for Low-Resource Speech Recognition
%A Chen, Yaqi
%A Zhang, Hao
%A Zhang, Wenlin
%A Yang, XuKui
%A Qu, Dan
%A Liu, Yunpeng
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F chen-etal-2025-metamixspeech
%X Meta-learning has proven to be a powerful paradigm for effectively improving the performance of low-resource speech recognition by learning generalizable knowledge across multiple tasks. However, multilingual meta learning also faces challenges such as task overfitting and learner overfitting, thereby reducing its ability to generalize to new tasks. To address these issues, we augment the meta-training task with “more data” during both training and evaluation phases. Concretely, we propose an interpolation-based task augmentation method called MetaMixSpeech, which includes both support augmentation and query augmentation. MetaMixSpeech enhances task diversity by linearly combining perturbed features from the support and query sets and performing the same linear interpolation on their corresponding losses. Experimental results on the FLEURS and Common Voice datasets demonstrate that MetaMixSpeech achieves a 6.35 % improvement in Word Error Rate (WER) compared to meta-learning approaches, effectively mitigating the overfitting problem and showcasing superior generalization across diverse datasets and language families.
%R 10.18653/v1/2025.findings-emnlp.202
%U https://aclanthology.org/2025.findings-emnlp.202/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.202
%P 3769-3779
Markdown (Informal)
[MetaMixSpeech: Meta Task Augmentation for Low-Resource Speech Recognition](https://aclanthology.org/2025.findings-emnlp.202/) (Chen et al., Findings 2025)
ACL