@inproceedings{merdjanovska-etal-2024-noisebench,
title = "{N}oise{B}ench: Benchmarking the Impact of Real Label Noise on Named Entity Recognition",
author = "Merdjanovska, Elena and
Aynetdinov, Ansar and
Akbik, Alan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.1011/",
doi = "10.18653/v1/2024.emnlp-main.1011",
pages = "18182--18198",
abstract = "Available training data for named entity recognition (NER) often contains a significant percentage of incorrect labels for entity types and entity boundaries. Such label noise poses challenges for supervised learning and may significantly deteriorate model quality. To address this, prior work proposed various noise-robust learning approaches capable of learning from data with partially incorrect labels. These approaches are typically evaluated using simulated noise where the labels in a clean dataset are automatically corrupted. However, as we show in this paper, this leads to unrealistic noise that is far easier to handle than real noise caused by human error or semi-automatic annotation. To enable the study of the impact of various types of real noise, we introduce NoiseBench, an NER benchmark consisting of clean training data corrupted with 6 types of real noise, including expert errors, crowdsourcing errors, automatic annotation errors and LLM errors. We present an analysis that shows that real noise is significantly more challenging than simulated noise, and show that current state-of-the-art models for noise-robust learning fall far short of their achievable upper bound. We release NoiseBench for both English and German to the research community."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="merdjanovska-etal-2024-noisebench">
<titleInfo>
<title>NoiseBench: Benchmarking the Impact of Real Label Noise on Named Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Merdjanovska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ansar</namePart>
<namePart type="family">Aynetdinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Akbik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Available training data for named entity recognition (NER) often contains a significant percentage of incorrect labels for entity types and entity boundaries. Such label noise poses challenges for supervised learning and may significantly deteriorate model quality. To address this, prior work proposed various noise-robust learning approaches capable of learning from data with partially incorrect labels. These approaches are typically evaluated using simulated noise where the labels in a clean dataset are automatically corrupted. However, as we show in this paper, this leads to unrealistic noise that is far easier to handle than real noise caused by human error or semi-automatic annotation. To enable the study of the impact of various types of real noise, we introduce NoiseBench, an NER benchmark consisting of clean training data corrupted with 6 types of real noise, including expert errors, crowdsourcing errors, automatic annotation errors and LLM errors. We present an analysis that shows that real noise is significantly more challenging than simulated noise, and show that current state-of-the-art models for noise-robust learning fall far short of their achievable upper bound. We release NoiseBench for both English and German to the research community.</abstract>
<identifier type="citekey">merdjanovska-etal-2024-noisebench</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.1011</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.1011/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>18182</start>
<end>18198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NoiseBench: Benchmarking the Impact of Real Label Noise on Named Entity Recognition
%A Merdjanovska, Elena
%A Aynetdinov, Ansar
%A Akbik, Alan
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F merdjanovska-etal-2024-noisebench
%X Available training data for named entity recognition (NER) often contains a significant percentage of incorrect labels for entity types and entity boundaries. Such label noise poses challenges for supervised learning and may significantly deteriorate model quality. To address this, prior work proposed various noise-robust learning approaches capable of learning from data with partially incorrect labels. These approaches are typically evaluated using simulated noise where the labels in a clean dataset are automatically corrupted. However, as we show in this paper, this leads to unrealistic noise that is far easier to handle than real noise caused by human error or semi-automatic annotation. To enable the study of the impact of various types of real noise, we introduce NoiseBench, an NER benchmark consisting of clean training data corrupted with 6 types of real noise, including expert errors, crowdsourcing errors, automatic annotation errors and LLM errors. We present an analysis that shows that real noise is significantly more challenging than simulated noise, and show that current state-of-the-art models for noise-robust learning fall far short of their achievable upper bound. We release NoiseBench for both English and German to the research community.
%R 10.18653/v1/2024.emnlp-main.1011
%U https://aclanthology.org/2024.emnlp-main.1011/
%U https://doi.org/10.18653/v1/2024.emnlp-main.1011
%P 18182-18198
Markdown (Informal)
[NoiseBench: Benchmarking the Impact of Real Label Noise on Named Entity Recognition](https://aclanthology.org/2024.emnlp-main.1011/) (Merdjanovska et al., EMNLP 2024)
ACL