@inproceedings{zhao-etal-2024-correcting,
title = "Correcting Language Model Bias for Text Classification in True Zero-Shot Learning",
author = "Zhao, Feng and
Xianlin, Wan and
Yan, Cheng and
Loo, Chu Kiong",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.359",
pages = "4036--4046",
abstract = "Combining pre-trained language models (PLMs) and manual templates is a common practice for text classification in zero-shot scenarios. However, the effect of this approach is highly volatile, ranging from random guesses to near state-of-the-art results, depending on the quality of the manual templates. In this paper, we show that this instability stems from the fact that language models tend toward predicting certain label words of text classification, and manual templates can influence this tendency. To address this, we develop a novel pipeline for annotating and filtering a few examples from unlabeled examples. Moreover, we propose a new method to measure model bias on label words that utilizes unlabeled examples as a validation set when tuning language models. Our approach does not require any pre-labeled examples. Experimental results on six text classification tasks demonstrate that the proposed approach significantly outperforms standard prompt learning in zero-shot settings, achieving up to 19.7{\%} absolute improvement and 13.8{\%} average improvement. More surprisingly, on IMDB and SST-2, our approach even exceeds all few-shot baselines.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2024-correcting">
<titleInfo>
<title>Correcting Language Model Bias for Text Classification in True Zero-Shot Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Feng</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wan</namePart>
<namePart type="family">Xianlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu</namePart>
<namePart type="given">Kiong</namePart>
<namePart type="family">Loo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Combining pre-trained language models (PLMs) and manual templates is a common practice for text classification in zero-shot scenarios. However, the effect of this approach is highly volatile, ranging from random guesses to near state-of-the-art results, depending on the quality of the manual templates. In this paper, we show that this instability stems from the fact that language models tend toward predicting certain label words of text classification, and manual templates can influence this tendency. To address this, we develop a novel pipeline for annotating and filtering a few examples from unlabeled examples. Moreover, we propose a new method to measure model bias on label words that utilizes unlabeled examples as a validation set when tuning language models. Our approach does not require any pre-labeled examples. Experimental results on six text classification tasks demonstrate that the proposed approach significantly outperforms standard prompt learning in zero-shot settings, achieving up to 19.7% absolute improvement and 13.8% average improvement. More surprisingly, on IMDB and SST-2, our approach even exceeds all few-shot baselines.</abstract>
<identifier type="citekey">zhao-etal-2024-correcting</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.359</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>4036</start>
<end>4046</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Correcting Language Model Bias for Text Classification in True Zero-Shot Learning
%A Zhao, Feng
%A Xianlin, Wan
%A Yan, Cheng
%A Loo, Chu Kiong
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zhao-etal-2024-correcting
%X Combining pre-trained language models (PLMs) and manual templates is a common practice for text classification in zero-shot scenarios. However, the effect of this approach is highly volatile, ranging from random guesses to near state-of-the-art results, depending on the quality of the manual templates. In this paper, we show that this instability stems from the fact that language models tend toward predicting certain label words of text classification, and manual templates can influence this tendency. To address this, we develop a novel pipeline for annotating and filtering a few examples from unlabeled examples. Moreover, we propose a new method to measure model bias on label words that utilizes unlabeled examples as a validation set when tuning language models. Our approach does not require any pre-labeled examples. Experimental results on six text classification tasks demonstrate that the proposed approach significantly outperforms standard prompt learning in zero-shot settings, achieving up to 19.7% absolute improvement and 13.8% average improvement. More surprisingly, on IMDB and SST-2, our approach even exceeds all few-shot baselines.
%U https://aclanthology.org/2024.lrec-main.359
%P 4036-4046
Markdown (Informal)
[Correcting Language Model Bias for Text Classification in True Zero-Shot Learning](https://aclanthology.org/2024.lrec-main.359) (Zhao et al., LREC-COLING 2024)
ACL