@inproceedings{seth-etal-2023-learning,
title = "Learning the Legibility of Visual Text Perturbations",
author = "Seth, Dev and
Stureborg, Rickard and
Pruthi, Danish and
Dhingra, Bhuwan",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.238",
doi = "10.18653/v1/2023.eacl-main.238",
pages = "3260--3273",
abstract = "Many adversarial attacks in NLP perturb text in puts to produce visually similar strings ({`}ergo{'}, {`}εrgo{'}) which are legible to humans but degrade model performance. Although preserving legibility is a necessary condition for text perturbation, little work has been done to systematically characterize it; instead, legibility is typically loosely enforced via intuitions around the nature and extent of perturbations. Particularly, it is unclear to what extent can inputs be perturbed while preserving legibility, or how to quantify the legibility of a perturbed string. In this work, we address this gap by learning models that predict the legibility of a perturbed string, and rank candidate perturbations based on their legibility. To do so, we collect and release LEGIT, a human-annotated dataset comprising the legibility of visually perturbed text. Using this dataset, we build both text- and vision-based models which achieve up to 0.91 F score in predicting whether an input is legible, and an accuracy of 0.86 in predicting which of two given perturbations is more legible. Additionally, we discover that legible perturbations from the LEGIT dataset are more effective at lowering the performance of NLP models than best-known attack strategies, suggesting that current models may be vulnerable to a broad range of perturbations beyond what is captured by existing visual attacks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="seth-etal-2023-learning">
<titleInfo>
<title>Learning the Legibility of Visual Text Perturbations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dev</namePart>
<namePart type="family">Seth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rickard</namePart>
<namePart type="family">Stureborg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danish</namePart>
<namePart type="family">Pruthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhuwan</namePart>
<namePart type="family">Dhingra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many adversarial attacks in NLP perturb text in puts to produce visually similar strings (‘ergo’, ‘εrgo’) which are legible to humans but degrade model performance. Although preserving legibility is a necessary condition for text perturbation, little work has been done to systematically characterize it; instead, legibility is typically loosely enforced via intuitions around the nature and extent of perturbations. Particularly, it is unclear to what extent can inputs be perturbed while preserving legibility, or how to quantify the legibility of a perturbed string. In this work, we address this gap by learning models that predict the legibility of a perturbed string, and rank candidate perturbations based on their legibility. To do so, we collect and release LEGIT, a human-annotated dataset comprising the legibility of visually perturbed text. Using this dataset, we build both text- and vision-based models which achieve up to 0.91 F score in predicting whether an input is legible, and an accuracy of 0.86 in predicting which of two given perturbations is more legible. Additionally, we discover that legible perturbations from the LEGIT dataset are more effective at lowering the performance of NLP models than best-known attack strategies, suggesting that current models may be vulnerable to a broad range of perturbations beyond what is captured by existing visual attacks.</abstract>
<identifier type="citekey">seth-etal-2023-learning</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.238</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.238</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>3260</start>
<end>3273</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning the Legibility of Visual Text Perturbations
%A Seth, Dev
%A Stureborg, Rickard
%A Pruthi, Danish
%A Dhingra, Bhuwan
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F seth-etal-2023-learning
%X Many adversarial attacks in NLP perturb text in puts to produce visually similar strings (‘ergo’, ‘εrgo’) which are legible to humans but degrade model performance. Although preserving legibility is a necessary condition for text perturbation, little work has been done to systematically characterize it; instead, legibility is typically loosely enforced via intuitions around the nature and extent of perturbations. Particularly, it is unclear to what extent can inputs be perturbed while preserving legibility, or how to quantify the legibility of a perturbed string. In this work, we address this gap by learning models that predict the legibility of a perturbed string, and rank candidate perturbations based on their legibility. To do so, we collect and release LEGIT, a human-annotated dataset comprising the legibility of visually perturbed text. Using this dataset, we build both text- and vision-based models which achieve up to 0.91 F score in predicting whether an input is legible, and an accuracy of 0.86 in predicting which of two given perturbations is more legible. Additionally, we discover that legible perturbations from the LEGIT dataset are more effective at lowering the performance of NLP models than best-known attack strategies, suggesting that current models may be vulnerable to a broad range of perturbations beyond what is captured by existing visual attacks.
%R 10.18653/v1/2023.eacl-main.238
%U https://aclanthology.org/2023.eacl-main.238
%U https://doi.org/10.18653/v1/2023.eacl-main.238
%P 3260-3273
Markdown (Informal)
[Learning the Legibility of Visual Text Perturbations](https://aclanthology.org/2023.eacl-main.238) (Seth et al., EACL 2023)
ACL
- Dev Seth, Rickard Stureborg, Danish Pruthi, and Bhuwan Dhingra. 2023. Learning the Legibility of Visual Text Perturbations. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pages 3260–3273, Dubrovnik, Croatia. Association for Computational Linguistics.