@inproceedings{araujo-etal-2021-stress,
title = "Stress Test Evaluation of Biomedical Word Embeddings",
author = "Araujo, Vladimir and
Carvallo, Andr{\'e}s and
Aspillaga, Carlos and
Thorne, Camilo and
Parra, Denis",
editor = "Demner-Fushman, Dina and
Cohen, Kevin Bretonnel and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "Proceedings of the 20th Workshop on Biomedical Language Processing",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.bionlp-1.13",
doi = "10.18653/v1/2021.bionlp-1.13",
pages = "119--125",
abstract = "The success of pretrained word embeddings has motivated their use in the biomedical domain, with contextualized embeddings yielding remarkable results in several biomedical NLP tasks. However, there is a lack of research on quantifying their behavior under severe {``}stress{''} scenarios. In this work, we systematically evaluate three language models with adversarial examples {--} automatically constructed tests that allow us to examine how robust the models are. We propose two types of stress scenarios focused on the biomedical named entity recognition (NER) task, one inspired by spelling errors and another based on the use of synonyms for medical terms. Our experiments with three benchmarks show that the performance of the original models decreases considerably, in addition to revealing their weaknesses and strengths. Finally, we show that adversarial training causes the models to improve their robustness and even to exceed the original performance in some cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="araujo-etal-2021-stress">
<titleInfo>
<title>Stress Test Evaluation of Biomedical Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="family">Araujo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrés</namePart>
<namePart type="family">Carvallo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Aspillaga</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Camilo</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Denis</namePart>
<namePart type="family">Parra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Workshop on Biomedical Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The success of pretrained word embeddings has motivated their use in the biomedical domain, with contextualized embeddings yielding remarkable results in several biomedical NLP tasks. However, there is a lack of research on quantifying their behavior under severe “stress” scenarios. In this work, we systematically evaluate three language models with adversarial examples – automatically constructed tests that allow us to examine how robust the models are. We propose two types of stress scenarios focused on the biomedical named entity recognition (NER) task, one inspired by spelling errors and another based on the use of synonyms for medical terms. Our experiments with three benchmarks show that the performance of the original models decreases considerably, in addition to revealing their weaknesses and strengths. Finally, we show that adversarial training causes the models to improve their robustness and even to exceed the original performance in some cases.</abstract>
<identifier type="citekey">araujo-etal-2021-stress</identifier>
<identifier type="doi">10.18653/v1/2021.bionlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2021.bionlp-1.13</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>119</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Stress Test Evaluation of Biomedical Word Embeddings
%A Araujo, Vladimir
%A Carvallo, Andrés
%A Aspillaga, Carlos
%A Thorne, Camilo
%A Parra, Denis
%Y Demner-Fushman, Dina
%Y Cohen, Kevin Bretonnel
%Y Ananiadou, Sophia
%Y Tsujii, Junichi
%S Proceedings of the 20th Workshop on Biomedical Language Processing
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F araujo-etal-2021-stress
%X The success of pretrained word embeddings has motivated their use in the biomedical domain, with contextualized embeddings yielding remarkable results in several biomedical NLP tasks. However, there is a lack of research on quantifying their behavior under severe “stress” scenarios. In this work, we systematically evaluate three language models with adversarial examples – automatically constructed tests that allow us to examine how robust the models are. We propose two types of stress scenarios focused on the biomedical named entity recognition (NER) task, one inspired by spelling errors and another based on the use of synonyms for medical terms. Our experiments with three benchmarks show that the performance of the original models decreases considerably, in addition to revealing their weaknesses and strengths. Finally, we show that adversarial training causes the models to improve their robustness and even to exceed the original performance in some cases.
%R 10.18653/v1/2021.bionlp-1.13
%U https://aclanthology.org/2021.bionlp-1.13
%U https://doi.org/10.18653/v1/2021.bionlp-1.13
%P 119-125
Markdown (Informal)
[Stress Test Evaluation of Biomedical Word Embeddings](https://aclanthology.org/2021.bionlp-1.13) (Araujo et al., BioNLP 2021)
ACL
- Vladimir Araujo, Andrés Carvallo, Carlos Aspillaga, Camilo Thorne, and Denis Parra. 2021. Stress Test Evaluation of Biomedical Word Embeddings. In Proceedings of the 20th Workshop on Biomedical Language Processing, pages 119–125, Online. Association for Computational Linguistics.