@inproceedings{lugo-vielzeuf-2024-towards,
title = "Towards efficient self-supervised representation learning in speech processing",
author = "Lugo, Luis and
Vielzeuf, Valentin",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.23",
pages = "340--346",
abstract = "Self-supervised learning has achieved impressive results in speech processing, but current models are computationally expensive, generating environmental concerns because of their high energy consumption. Therefore, we propose an efficient self-supervised approach to address high computational costs, using a single GPU during 24 to 48 hours of pretraining. The proposed approach combines linear, convolutional, and self-attention layers with several optimizations, including dynamic batching, flash attention, mixed-precision training, gradient accumulation, and acoustic feature extraction with input preprocessing. Computational cost estimations for our proposed model represent up to two orders of magnitude improvements in computational efficiency against existing speech models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lugo-vielzeuf-2024-towards">
<titleInfo>
<title>Towards efficient self-supervised representation learning in speech processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Lugo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Vielzeuf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Self-supervised learning has achieved impressive results in speech processing, but current models are computationally expensive, generating environmental concerns because of their high energy consumption. Therefore, we propose an efficient self-supervised approach to address high computational costs, using a single GPU during 24 to 48 hours of pretraining. The proposed approach combines linear, convolutional, and self-attention layers with several optimizations, including dynamic batching, flash attention, mixed-precision training, gradient accumulation, and acoustic feature extraction with input preprocessing. Computational cost estimations for our proposed model represent up to two orders of magnitude improvements in computational efficiency against existing speech models.</abstract>
<identifier type="citekey">lugo-vielzeuf-2024-towards</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.23</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>340</start>
<end>346</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards efficient self-supervised representation learning in speech processing
%A Lugo, Luis
%A Vielzeuf, Valentin
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F lugo-vielzeuf-2024-towards
%X Self-supervised learning has achieved impressive results in speech processing, but current models are computationally expensive, generating environmental concerns because of their high energy consumption. Therefore, we propose an efficient self-supervised approach to address high computational costs, using a single GPU during 24 to 48 hours of pretraining. The proposed approach combines linear, convolutional, and self-attention layers with several optimizations, including dynamic batching, flash attention, mixed-precision training, gradient accumulation, and acoustic feature extraction with input preprocessing. Computational cost estimations for our proposed model represent up to two orders of magnitude improvements in computational efficiency against existing speech models.
%U https://aclanthology.org/2024.findings-eacl.23
%P 340-346
Markdown (Informal)
[Towards efficient self-supervised representation learning in speech processing](https://aclanthology.org/2024.findings-eacl.23) (Lugo & Vielzeuf, Findings 2024)
ACL