@inproceedings{harbecke-etal-2022-micro,
title = "Why only Micro-F1? Class Weighting of Measures for Relation Classification",
author = "Harbecke, David and
Chen, Yuxuan and
Hennig, Leonhard and
Alt, Christoph",
booktitle = "Proceedings of NLP Power! The First Workshop on Efficient Benchmarking in NLP",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlppower-1.4",
doi = "10.18653/v1/2022.nlppower-1.4",
pages = "32--41",
abstract = "Relation classification models are conventionally evaluated using only a single measure, e.g., micro-F1, macro-F1 or AUC. In this work, we analyze weighting schemes, such as micro and macro, for imbalanced datasets. We introduce a framework for weighting schemes, where existing schemes are extremes, and two new intermediate schemes. We show that reporting results of different weighting schemes better highlights strengths and weaknesses of a model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="harbecke-etal-2022-micro">
<titleInfo>
<title>Why only Micro-F1? Class Weighting of Measures for Relation Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Harbecke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuxuan</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonhard</namePart>
<namePart type="family">Hennig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Alt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of NLP Power! The First Workshop on Efficient Benchmarking in NLP</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Relation classification models are conventionally evaluated using only a single measure, e.g., micro-F1, macro-F1 or AUC. In this work, we analyze weighting schemes, such as micro and macro, for imbalanced datasets. We introduce a framework for weighting schemes, where existing schemes are extremes, and two new intermediate schemes. We show that reporting results of different weighting schemes better highlights strengths and weaknesses of a model.</abstract>
<identifier type="citekey">harbecke-etal-2022-micro</identifier>
<identifier type="doi">10.18653/v1/2022.nlppower-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.nlppower-1.4</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>32</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Why only Micro-F1? Class Weighting of Measures for Relation Classification
%A Harbecke, David
%A Chen, Yuxuan
%A Hennig, Leonhard
%A Alt, Christoph
%S Proceedings of NLP Power! The First Workshop on Efficient Benchmarking in NLP
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F harbecke-etal-2022-micro
%X Relation classification models are conventionally evaluated using only a single measure, e.g., micro-F1, macro-F1 or AUC. In this work, we analyze weighting schemes, such as micro and macro, for imbalanced datasets. We introduce a framework for weighting schemes, where existing schemes are extremes, and two new intermediate schemes. We show that reporting results of different weighting schemes better highlights strengths and weaknesses of a model.
%R 10.18653/v1/2022.nlppower-1.4
%U https://aclanthology.org/2022.nlppower-1.4
%U https://doi.org/10.18653/v1/2022.nlppower-1.4
%P 32-41
Markdown (Informal)
[Why only Micro-F1? Class Weighting of Measures for Relation Classification](https://aclanthology.org/2022.nlppower-1.4) (Harbecke et al., nlppower 2022)
ACL