@inproceedings{huang-etal-2024-logits,
title = "Logits Reranking via Semantic Labels for Hard Samples in Text Classification",
author = "Huang, Peijie and
Huang, Junbao and
Xu, Yuhong and
Li, Weizhen and
Xiao, Xisheng",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.657",
pages = "11250--11262",
abstract = "Pre-trained Language Models (PLMs) have achieved significant success in text classification. However, they still face challenges with hard samples, which refer to instances where the model exhibits diminished confidence in distinguishing new samples. Existing research has addressed related issues, but often overlooks the semantic information inherent in the labels, treating them merely as one-hot vectors. In this paper, we propose Logits Reranking via Semantic Labels (LRSL), a model-agnostic post-processing method that leverages label semantics and auto detection of hard samples to improve classification accuracy. LRSL automatically identifies hard samples, which are then jointly processed by MLP-based and Similarity-based approaches. Applied only during inference, LRSL operates solely on classification logits, reranking them based on semantic similarities without interfering with the model{'}s training process. The experiments demonstrate the effectiveness of our method, showing significant improvements across different PLMs. Our codes are publicly available at https://github.com/SIGSDSscau/LRSL.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2024-logits">
<titleInfo>
<title>Logits Reranking via Semantic Labels for Hard Samples in Text Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Peijie</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junbao</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuhong</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weizhen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xisheng</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-trained Language Models (PLMs) have achieved significant success in text classification. However, they still face challenges with hard samples, which refer to instances where the model exhibits diminished confidence in distinguishing new samples. Existing research has addressed related issues, but often overlooks the semantic information inherent in the labels, treating them merely as one-hot vectors. In this paper, we propose Logits Reranking via Semantic Labels (LRSL), a model-agnostic post-processing method that leverages label semantics and auto detection of hard samples to improve classification accuracy. LRSL automatically identifies hard samples, which are then jointly processed by MLP-based and Similarity-based approaches. Applied only during inference, LRSL operates solely on classification logits, reranking them based on semantic similarities without interfering with the model’s training process. The experiments demonstrate the effectiveness of our method, showing significant improvements across different PLMs. Our codes are publicly available at https://github.com/SIGSDSscau/LRSL.</abstract>
<identifier type="citekey">huang-etal-2024-logits</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.657</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>11250</start>
<end>11262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Logits Reranking via Semantic Labels for Hard Samples in Text Classification
%A Huang, Peijie
%A Huang, Junbao
%A Xu, Yuhong
%A Li, Weizhen
%A Xiao, Xisheng
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F huang-etal-2024-logits
%X Pre-trained Language Models (PLMs) have achieved significant success in text classification. However, they still face challenges with hard samples, which refer to instances where the model exhibits diminished confidence in distinguishing new samples. Existing research has addressed related issues, but often overlooks the semantic information inherent in the labels, treating them merely as one-hot vectors. In this paper, we propose Logits Reranking via Semantic Labels (LRSL), a model-agnostic post-processing method that leverages label semantics and auto detection of hard samples to improve classification accuracy. LRSL automatically identifies hard samples, which are then jointly processed by MLP-based and Similarity-based approaches. Applied only during inference, LRSL operates solely on classification logits, reranking them based on semantic similarities without interfering with the model’s training process. The experiments demonstrate the effectiveness of our method, showing significant improvements across different PLMs. Our codes are publicly available at https://github.com/SIGSDSscau/LRSL.
%U https://aclanthology.org/2024.findings-emnlp.657
%P 11250-11262
Markdown (Informal)
[Logits Reranking via Semantic Labels for Hard Samples in Text Classification](https://aclanthology.org/2024.findings-emnlp.657) (Huang et al., Findings 2024)
ACL