@inproceedings{lakshmi-narayan-etal-2019-exploration,
title = "Exploration of Noise Strategies in Semi-supervised Named Entity Classification",
author = "Lakshmi Narayan, Pooja and
Nagesh, Ajay and
Surdeanu, Mihai",
editor = "Mihalcea, Rada and
Shutova, Ekaterina and
Ku, Lun-Wei and
Evang, Kilian and
Poria, Soujanya",
booktitle = "Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*{SEM} 2019)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S19-1020",
doi = "10.18653/v1/S19-1020",
pages = "186--191",
abstract = "Noise is inherent in real world datasets and modeling noise is critical during training as it is effective in regularization. Recently, novel semi-supervised deep learning techniques have demonstrated tremendous potential when learning with very limited labeled training data in image processing tasks. A critical aspect of these semi-supervised learning techniques is augmenting the input or the network with noise to be able to learn robust models. While modeling noise is relatively straightforward in continuous domains such as image classification, it is not immediately apparent how noise can be modeled in discrete domains such as language. Our work aims to address this gap by exploring different noise strategies for the semi-supervised named entity classification task, including statistical methods such as adding Gaussian noise to input embeddings, and linguistically-inspired ones such as dropping words and replacing words with their synonyms. We compare their performance on two benchmark datasets (OntoNotes and CoNLL) for named entity classification. Our results indicate that noise strategies that are linguistically informed perform at least as well as statistical approaches, while being simpler and requiring minimal tuning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lakshmi-narayan-etal-2019-exploration">
<titleInfo>
<title>Exploration of Noise Strategies in Semi-supervised Named Entity Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pooja</namePart>
<namePart type="family">Lakshmi Narayan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ajay</namePart>
<namePart type="family">Nagesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*SEM 2019)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rada</namePart>
<namePart type="family">Mihalcea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kilian</namePart>
<namePart type="family">Evang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Noise is inherent in real world datasets and modeling noise is critical during training as it is effective in regularization. Recently, novel semi-supervised deep learning techniques have demonstrated tremendous potential when learning with very limited labeled training data in image processing tasks. A critical aspect of these semi-supervised learning techniques is augmenting the input or the network with noise to be able to learn robust models. While modeling noise is relatively straightforward in continuous domains such as image classification, it is not immediately apparent how noise can be modeled in discrete domains such as language. Our work aims to address this gap by exploring different noise strategies for the semi-supervised named entity classification task, including statistical methods such as adding Gaussian noise to input embeddings, and linguistically-inspired ones such as dropping words and replacing words with their synonyms. We compare their performance on two benchmark datasets (OntoNotes and CoNLL) for named entity classification. Our results indicate that noise strategies that are linguistically informed perform at least as well as statistical approaches, while being simpler and requiring minimal tuning.</abstract>
<identifier type="citekey">lakshmi-narayan-etal-2019-exploration</identifier>
<identifier type="doi">10.18653/v1/S19-1020</identifier>
<location>
<url>https://aclanthology.org/S19-1020</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>186</start>
<end>191</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploration of Noise Strategies in Semi-supervised Named Entity Classification
%A Lakshmi Narayan, Pooja
%A Nagesh, Ajay
%A Surdeanu, Mihai
%Y Mihalcea, Rada
%Y Shutova, Ekaterina
%Y Ku, Lun-Wei
%Y Evang, Kilian
%Y Poria, Soujanya
%S Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*SEM 2019)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F lakshmi-narayan-etal-2019-exploration
%X Noise is inherent in real world datasets and modeling noise is critical during training as it is effective in regularization. Recently, novel semi-supervised deep learning techniques have demonstrated tremendous potential when learning with very limited labeled training data in image processing tasks. A critical aspect of these semi-supervised learning techniques is augmenting the input or the network with noise to be able to learn robust models. While modeling noise is relatively straightforward in continuous domains such as image classification, it is not immediately apparent how noise can be modeled in discrete domains such as language. Our work aims to address this gap by exploring different noise strategies for the semi-supervised named entity classification task, including statistical methods such as adding Gaussian noise to input embeddings, and linguistically-inspired ones such as dropping words and replacing words with their synonyms. We compare their performance on two benchmark datasets (OntoNotes and CoNLL) for named entity classification. Our results indicate that noise strategies that are linguistically informed perform at least as well as statistical approaches, while being simpler and requiring minimal tuning.
%R 10.18653/v1/S19-1020
%U https://aclanthology.org/S19-1020
%U https://doi.org/10.18653/v1/S19-1020
%P 186-191
Markdown (Informal)
[Exploration of Noise Strategies in Semi-supervised Named Entity Classification](https://aclanthology.org/S19-1020) (Lakshmi Narayan et al., *SEM 2019)
ACL