@inproceedings{liusie-etal-2022-analyzing,
title = "Analyzing Biases to Spurious Correlations in Text Classification Tasks",
author = "Liusie, Adian and
Raina, Vatsal and
Raina, Vyas and
Gales, Mark",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.11",
pages = "78--84",
abstract = "Machine learning systems have shown impressive performance across a range of natural language tasks. However, it has been hypothesized that these systems are prone to learning spurious correlations that may be present in the training data. Though these correlations will not impact in-domain performance, they are unlikely to generalize well to out-of-domain data, limiting the applicability of systems. This work examines this phenomenon on text classification tasks. Rather than artificially injecting features into the data, we demonstrate that real spurious correlations can be exploited by current state-of-the-art deep-learning systems. Specifically, we show that even when only {`}stop{'} words are available at the input stage, it is possible to predict the class significantly better than random. Though it is shown that these stop words are not required for good in-domain performance, they can degrade the ability of the system to generalize well to out-of-domain data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liusie-etal-2022-analyzing">
<titleInfo>
<title>Analyzing Biases to Spurious Correlations in Text Classification Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adian</namePart>
<namePart type="family">Liusie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vatsal</namePart>
<namePart type="family">Raina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vyas</namePart>
<namePart type="family">Raina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Gales</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine learning systems have shown impressive performance across a range of natural language tasks. However, it has been hypothesized that these systems are prone to learning spurious correlations that may be present in the training data. Though these correlations will not impact in-domain performance, they are unlikely to generalize well to out-of-domain data, limiting the applicability of systems. This work examines this phenomenon on text classification tasks. Rather than artificially injecting features into the data, we demonstrate that real spurious correlations can be exploited by current state-of-the-art deep-learning systems. Specifically, we show that even when only ‘stop’ words are available at the input stage, it is possible to predict the class significantly better than random. Though it is shown that these stop words are not required for good in-domain performance, they can degrade the ability of the system to generalize well to out-of-domain data.</abstract>
<identifier type="citekey">liusie-etal-2022-analyzing</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.11</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>78</start>
<end>84</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analyzing Biases to Spurious Correlations in Text Classification Tasks
%A Liusie, Adian
%A Raina, Vatsal
%A Raina, Vyas
%A Gales, Mark
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F liusie-etal-2022-analyzing
%X Machine learning systems have shown impressive performance across a range of natural language tasks. However, it has been hypothesized that these systems are prone to learning spurious correlations that may be present in the training data. Though these correlations will not impact in-domain performance, they are unlikely to generalize well to out-of-domain data, limiting the applicability of systems. This work examines this phenomenon on text classification tasks. Rather than artificially injecting features into the data, we demonstrate that real spurious correlations can be exploited by current state-of-the-art deep-learning systems. Specifically, we show that even when only ‘stop’ words are available at the input stage, it is possible to predict the class significantly better than random. Though it is shown that these stop words are not required for good in-domain performance, they can degrade the ability of the system to generalize well to out-of-domain data.
%U https://aclanthology.org/2022.aacl-short.11
%P 78-84
Markdown (Informal)
[Analyzing Biases to Spurious Correlations in Text Classification Tasks](https://aclanthology.org/2022.aacl-short.11) (Liusie et al., AACL-IJCNLP 2022)
ACL
- Adian Liusie, Vatsal Raina, Vyas Raina, and Mark Gales. 2022. Analyzing Biases to Spurious Correlations in Text Classification Tasks. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 78–84, Online only. Association for Computational Linguistics.