@inproceedings{kim-lee-2024-sparseflow,
title = "{S}parse{F}low: Accelerating Transformers by Sparsifying Information Flows",
author = "Kim, Yeachan and
Lee, SangKeun",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-long.323/",
doi = "10.18653/v1/2024.acl-long.323",
pages = "5937--5948",
abstract = "Transformers have become the de-facto standard for natural language processing. However, dense information flows within transformers pose significant challenges for real-time and resource-constrained devices, as computational complexity grows quadratically with sequence length. To counteract such dense information flows, we propose SparseFlow, a novel efficient method designed to sparsify the dense pathways of token representations across all transformer blocks. To this end, SparseFlow parameterizes the information flows linking token representations to transformer blocks. These parameterized information flows are optimized to be sparse, allowing only the salient information to pass through into the blocks. To validate the efficacy of SparseFlow, we conduct comprehensive experiments across diverse benchmarks (understanding and generation), scales (ranging from millions to billions), architectures (including encoders, decoders, and seq-to-seq models), and modalities (such as language-only and vision-language). The results convincingly demonstrate that sparsifying the dense information flows leads to substantial speedup gains without compromising task accuracy. For instance, SparseFlow reduces computational costs by half on average, without a significant loss in accuracy."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kim-lee-2024-sparseflow">
<titleInfo>
<title>SparseFlow: Accelerating Transformers by Sparsifying Information Flows</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yeachan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">SangKeun</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transformers have become the de-facto standard for natural language processing. However, dense information flows within transformers pose significant challenges for real-time and resource-constrained devices, as computational complexity grows quadratically with sequence length. To counteract such dense information flows, we propose SparseFlow, a novel efficient method designed to sparsify the dense pathways of token representations across all transformer blocks. To this end, SparseFlow parameterizes the information flows linking token representations to transformer blocks. These parameterized information flows are optimized to be sparse, allowing only the salient information to pass through into the blocks. To validate the efficacy of SparseFlow, we conduct comprehensive experiments across diverse benchmarks (understanding and generation), scales (ranging from millions to billions), architectures (including encoders, decoders, and seq-to-seq models), and modalities (such as language-only and vision-language). The results convincingly demonstrate that sparsifying the dense information flows leads to substantial speedup gains without compromising task accuracy. For instance, SparseFlow reduces computational costs by half on average, without a significant loss in accuracy.</abstract>
<identifier type="citekey">kim-lee-2024-sparseflow</identifier>
<identifier type="doi">10.18653/v1/2024.acl-long.323</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-long.323/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>5937</start>
<end>5948</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SparseFlow: Accelerating Transformers by Sparsifying Information Flows
%A Kim, Yeachan
%A Lee, SangKeun
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F kim-lee-2024-sparseflow
%X Transformers have become the de-facto standard for natural language processing. However, dense information flows within transformers pose significant challenges for real-time and resource-constrained devices, as computational complexity grows quadratically with sequence length. To counteract such dense information flows, we propose SparseFlow, a novel efficient method designed to sparsify the dense pathways of token representations across all transformer blocks. To this end, SparseFlow parameterizes the information flows linking token representations to transformer blocks. These parameterized information flows are optimized to be sparse, allowing only the salient information to pass through into the blocks. To validate the efficacy of SparseFlow, we conduct comprehensive experiments across diverse benchmarks (understanding and generation), scales (ranging from millions to billions), architectures (including encoders, decoders, and seq-to-seq models), and modalities (such as language-only and vision-language). The results convincingly demonstrate that sparsifying the dense information flows leads to substantial speedup gains without compromising task accuracy. For instance, SparseFlow reduces computational costs by half on average, without a significant loss in accuracy.
%R 10.18653/v1/2024.acl-long.323
%U https://aclanthology.org/2024.luhme-long.323/
%U https://doi.org/10.18653/v1/2024.acl-long.323
%P 5937-5948
Markdown (Informal)
[SparseFlow: Accelerating Transformers by Sparsifying Information Flows](https://aclanthology.org/2024.luhme-long.323/) (Kim & Lee, ACL 2024)
ACL