@inproceedings{storks-chai-2021-beyond-tip,
title = "Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers",
author = "Storks, Shane and
Chai, Joyce",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.findings-emnlp.272",
doi = "10.18653/v1/2021.findings-emnlp.272",
pages = "3169--3177",
abstract = "As large-scale, pre-trained language models achieve human-level and superhuman accuracy on existing language understanding tasks, statistical bias in benchmark data and probing studies have recently called into question their true capabilities. For a more informative evaluation than accuracy on text classification tasks can offer, we propose evaluating systems through a novel measure of prediction coherence. We apply our framework to two existing language understanding benchmarks with different properties to demonstrate its versatility. Our experimental results show that this evaluation framework, although simple in ideas and implementation, is a quick, effective, and versatile measure to provide insight into the coherence of machines{'} predictions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="storks-chai-2021-beyond-tip">
<titleInfo>
<title>Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Storks</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2021</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>As large-scale, pre-trained language models achieve human-level and superhuman accuracy on existing language understanding tasks, statistical bias in benchmark data and probing studies have recently called into question their true capabilities. For a more informative evaluation than accuracy on text classification tasks can offer, we propose evaluating systems through a novel measure of prediction coherence. We apply our framework to two existing language understanding benchmarks with different properties to demonstrate its versatility. Our experimental results show that this evaluation framework, although simple in ideas and implementation, is a quick, effective, and versatile measure to provide insight into the coherence of machines’ predictions.</abstract>
<identifier type="citekey">storks-chai-2021-beyond-tip</identifier>
<identifier type="doi">10.18653/v1/2021.findings-emnlp.272</identifier>
<location>
<url>https://aclanthology.org/2021.findings-emnlp.272</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>3169</start>
<end>3177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers
%A Storks, Shane
%A Chai, Joyce
%S Findings of the Association for Computational Linguistics: EMNLP 2021
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic
%F storks-chai-2021-beyond-tip
%X As large-scale, pre-trained language models achieve human-level and superhuman accuracy on existing language understanding tasks, statistical bias in benchmark data and probing studies have recently called into question their true capabilities. For a more informative evaluation than accuracy on text classification tasks can offer, we propose evaluating systems through a novel measure of prediction coherence. We apply our framework to two existing language understanding benchmarks with different properties to demonstrate its versatility. Our experimental results show that this evaluation framework, although simple in ideas and implementation, is a quick, effective, and versatile measure to provide insight into the coherence of machines’ predictions.
%R 10.18653/v1/2021.findings-emnlp.272
%U https://aclanthology.org/2021.findings-emnlp.272
%U https://doi.org/10.18653/v1/2021.findings-emnlp.272
%P 3169-3177
Markdown (Informal)
[Beyond the Tip of the Iceberg: Assessing Coherence of Text Classifiers](https://aclanthology.org/2021.findings-emnlp.272) (Storks & Chai, Findings 2021)
ACL