@inproceedings{amiri-etal-2018-spotting,
title = "Spotting Spurious Data with Neural Networks",
author = "Amiri, Hadi and
Miller, Timothy and
Savova, Guergana",
editor = "Walker, Marilyn and
Ji, Heng and
Stent, Amanda",
booktitle = "Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N18-1182",
doi = "10.18653/v1/N18-1182",
pages = "2006--2016",
abstract = "Automatic identification of spurious instances (those with potentially wrong labels in datasets) can improve the quality of existing language resources, especially when annotations are obtained through crowdsourcing or automatically generated based on coded rankings. In this paper, we present effective approaches inspired by queueing theory and psychology of learning to automatically identify spurious instances in datasets. Our approaches discriminate instances based on their {``}difficulty to learn,{''} determined by a downstream learner. Our methods can be applied to any dataset assuming the existence of a neural network model for the target task of the dataset. Our best approach outperforms competing state-of-the-art baselines and has a MAP of 0.85 and 0.22 in identifying spurious instances in synthetic and carefully-crowdsourced real-world datasets respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="amiri-etal-2018-spotting">
<titleInfo>
<title>Spotting Spurious Data with Neural Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hadi</namePart>
<namePart type="family">Amiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Miller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guergana</namePart>
<namePart type="family">Savova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marilyn</namePart>
<namePart type="family">Walker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic identification of spurious instances (those with potentially wrong labels in datasets) can improve the quality of existing language resources, especially when annotations are obtained through crowdsourcing or automatically generated based on coded rankings. In this paper, we present effective approaches inspired by queueing theory and psychology of learning to automatically identify spurious instances in datasets. Our approaches discriminate instances based on their “difficulty to learn,” determined by a downstream learner. Our methods can be applied to any dataset assuming the existence of a neural network model for the target task of the dataset. Our best approach outperforms competing state-of-the-art baselines and has a MAP of 0.85 and 0.22 in identifying spurious instances in synthetic and carefully-crowdsourced real-world datasets respectively.</abstract>
<identifier type="citekey">amiri-etal-2018-spotting</identifier>
<identifier type="doi">10.18653/v1/N18-1182</identifier>
<location>
<url>https://aclanthology.org/N18-1182</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>2006</start>
<end>2016</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Spotting Spurious Data with Neural Networks
%A Amiri, Hadi
%A Miller, Timothy
%A Savova, Guergana
%Y Walker, Marilyn
%Y Ji, Heng
%Y Stent, Amanda
%S Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F amiri-etal-2018-spotting
%X Automatic identification of spurious instances (those with potentially wrong labels in datasets) can improve the quality of existing language resources, especially when annotations are obtained through crowdsourcing or automatically generated based on coded rankings. In this paper, we present effective approaches inspired by queueing theory and psychology of learning to automatically identify spurious instances in datasets. Our approaches discriminate instances based on their “difficulty to learn,” determined by a downstream learner. Our methods can be applied to any dataset assuming the existence of a neural network model for the target task of the dataset. Our best approach outperforms competing state-of-the-art baselines and has a MAP of 0.85 and 0.22 in identifying spurious instances in synthetic and carefully-crowdsourced real-world datasets respectively.
%R 10.18653/v1/N18-1182
%U https://aclanthology.org/N18-1182
%U https://doi.org/10.18653/v1/N18-1182
%P 2006-2016
Markdown (Informal)
[Spotting Spurious Data with Neural Networks](https://aclanthology.org/N18-1182) (Amiri et al., NAACL 2018)
ACL
- Hadi Amiri, Timothy Miller, and Guergana Savova. 2018. Spotting Spurious Data with Neural Networks. In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), pages 2006–2016, New Orleans, Louisiana. Association for Computational Linguistics.