@inproceedings{bast-etal-2023-fast,
title = "Fast Whitespace Correction with Encoder-Only Transformers",
author = "Bast, Hannah and
Hertel, Matthias and
Walter, Sebastian",
editor = "Bollegala, Danushka and
Huang, Ruihong and
Ritter, Alan",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-demo.37",
doi = "10.18653/v1/2023.acl-demo.37",
pages = "389--399",
abstract = "The goal of whitespace correction is to fix space errors in arbitrary given text. For example, given the text {``}whi te space correctio nwithTransf or mers{''}, produce {``}whitespace correction with Transformers{''}. We compare two Transformer-based models, a character-level encoder-decoder model and a byte-level encoder-only model. We find that the encoder-only model is both faster and achieves higher quality. We provide an easy-to-use tool that is over 900 times faster than the previous best tool, with the same high quality. Our tool repairs text at a rate of over 200 kB/s on GPU, with a sequence-averaged F1-score ranging from 87.5{\%} for hard-to-correct text up to 99{\%} for text without any spaces.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bast-etal-2023-fast">
<titleInfo>
<title>Fast Whitespace Correction with Encoder-Only Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Bast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Hertel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Walter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danushka</namePart>
<namePart type="family">Bollegala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruihong</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The goal of whitespace correction is to fix space errors in arbitrary given text. For example, given the text “whi te space correctio nwithTransf or mers”, produce “whitespace correction with Transformers”. We compare two Transformer-based models, a character-level encoder-decoder model and a byte-level encoder-only model. We find that the encoder-only model is both faster and achieves higher quality. We provide an easy-to-use tool that is over 900 times faster than the previous best tool, with the same high quality. Our tool repairs text at a rate of over 200 kB/s on GPU, with a sequence-averaged F1-score ranging from 87.5% for hard-to-correct text up to 99% for text without any spaces.</abstract>
<identifier type="citekey">bast-etal-2023-fast</identifier>
<identifier type="doi">10.18653/v1/2023.acl-demo.37</identifier>
<location>
<url>https://aclanthology.org/2023.acl-demo.37</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>389</start>
<end>399</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fast Whitespace Correction with Encoder-Only Transformers
%A Bast, Hannah
%A Hertel, Matthias
%A Walter, Sebastian
%Y Bollegala, Danushka
%Y Huang, Ruihong
%Y Ritter, Alan
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F bast-etal-2023-fast
%X The goal of whitespace correction is to fix space errors in arbitrary given text. For example, given the text “whi te space correctio nwithTransf or mers”, produce “whitespace correction with Transformers”. We compare two Transformer-based models, a character-level encoder-decoder model and a byte-level encoder-only model. We find that the encoder-only model is both faster and achieves higher quality. We provide an easy-to-use tool that is over 900 times faster than the previous best tool, with the same high quality. Our tool repairs text at a rate of over 200 kB/s on GPU, with a sequence-averaged F1-score ranging from 87.5% for hard-to-correct text up to 99% for text without any spaces.
%R 10.18653/v1/2023.acl-demo.37
%U https://aclanthology.org/2023.acl-demo.37
%U https://doi.org/10.18653/v1/2023.acl-demo.37
%P 389-399
Markdown (Informal)
[Fast Whitespace Correction with Encoder-Only Transformers](https://aclanthology.org/2023.acl-demo.37) (Bast et al., ACL 2023)
ACL
- Hannah Bast, Matthias Hertel, and Sebastian Walter. 2023. Fast Whitespace Correction with Encoder-Only Transformers. In Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations), pages 389–399, Toronto, Canada. Association for Computational Linguistics.