@inproceedings{qi-etal-2023-safer,
title = "{S}a{FER}: A Robust and Efficient Framework for Fine-tuning {BERT}-based Classifier with Noisy Labels",
author = "Qi, Zhenting and
Tan, Xiaoyu and
Qu, Chao and
Xu, Yinghui and
Qi, Yuan",
editor = "Sitaram, Sunayana and
Beigman Klebanov, Beata and
Williams, Jason D",
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.acl-industry.38",
doi = "10.18653/v1/2023.acl-industry.38",
pages = "390--403",
abstract = "Learning on noisy datasets is a challenging problem when pre-trained language models are applied to real-world text classification tasks. In numerous industrial applications, acquiring task-specific datasets with 100{\%} accurate labels is difficult, thus many datasets are accompanied by label noise at different levels. Previous work has shown that existing noise-handling methods could not improve the peak performance of BERT on noisy datasets, and might even deteriorate it. In this paper, we propose SaFER, a robust and efficient fine-tuning framework for BERT-based text classifiers, combating label noises without access to any clean data for training or validation. Utilizing a label-agnostic early-stopping strategy and self-supervised learning, our proposed framework achieves superior performance in terms of both accuracy and speed on multiple text classification benchmarks. The trained model is finally fully deployed in several industrial biomedical literature mining tasks and demonstrates high effectiveness and efficiency.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qi-etal-2023-safer">
<titleInfo>
<title>SaFER: A Robust and Efficient Framework for Fine-tuning BERT-based Classifier with Noisy Labels</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhenting</namePart>
<namePart type="family">Qi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoyu</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chao</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yinghui</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuan</namePart>
<namePart type="family">Qi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sunayana</namePart>
<namePart type="family">Sitaram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beata</namePart>
<namePart type="family">Beigman Klebanov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Williams</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Learning on noisy datasets is a challenging problem when pre-trained language models are applied to real-world text classification tasks. In numerous industrial applications, acquiring task-specific datasets with 100% accurate labels is difficult, thus many datasets are accompanied by label noise at different levels. Previous work has shown that existing noise-handling methods could not improve the peak performance of BERT on noisy datasets, and might even deteriorate it. In this paper, we propose SaFER, a robust and efficient fine-tuning framework for BERT-based text classifiers, combating label noises without access to any clean data for training or validation. Utilizing a label-agnostic early-stopping strategy and self-supervised learning, our proposed framework achieves superior performance in terms of both accuracy and speed on multiple text classification benchmarks. The trained model is finally fully deployed in several industrial biomedical literature mining tasks and demonstrates high effectiveness and efficiency.</abstract>
<identifier type="citekey">qi-etal-2023-safer</identifier>
<identifier type="doi">10.18653/v1/2023.acl-industry.38</identifier>
<location>
<url>https://aclanthology.org/2023.acl-industry.38</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>390</start>
<end>403</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SaFER: A Robust and Efficient Framework for Fine-tuning BERT-based Classifier with Noisy Labels
%A Qi, Zhenting
%A Tan, Xiaoyu
%A Qu, Chao
%A Xu, Yinghui
%A Qi, Yuan
%Y Sitaram, Sunayana
%Y Beigman Klebanov, Beata
%Y Williams, Jason D.
%S Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F qi-etal-2023-safer
%X Learning on noisy datasets is a challenging problem when pre-trained language models are applied to real-world text classification tasks. In numerous industrial applications, acquiring task-specific datasets with 100% accurate labels is difficult, thus many datasets are accompanied by label noise at different levels. Previous work has shown that existing noise-handling methods could not improve the peak performance of BERT on noisy datasets, and might even deteriorate it. In this paper, we propose SaFER, a robust and efficient fine-tuning framework for BERT-based text classifiers, combating label noises without access to any clean data for training or validation. Utilizing a label-agnostic early-stopping strategy and self-supervised learning, our proposed framework achieves superior performance in terms of both accuracy and speed on multiple text classification benchmarks. The trained model is finally fully deployed in several industrial biomedical literature mining tasks and demonstrates high effectiveness and efficiency.
%R 10.18653/v1/2023.acl-industry.38
%U https://aclanthology.org/2023.acl-industry.38
%U https://doi.org/10.18653/v1/2023.acl-industry.38
%P 390-403
Markdown (Informal)
[SaFER: A Robust and Efficient Framework for Fine-tuning BERT-based Classifier with Noisy Labels](https://aclanthology.org/2023.acl-industry.38) (Qi et al., ACL 2023)
ACL