@inproceedings{mahanti-etal-2019-robust,
title = "Robust Text Classification using Sub-Word Information in Input Word Representations.",
author = "Mahanti, Bhanu Prakash and
Chhipa, Priyank and
Sridhar, Vivek and
Prasan, Vinuthkumar",
editor = "Sharma, Dipti Misra and
Bhattacharya, Pushpak",
booktitle = "Proceedings of the 16th International Conference on Natural Language Processing",
month = dec,
year = "2019",
address = "International Institute of Information Technology, Hyderabad, India",
publisher = "NLP Association of India",
url = "https://aclanthology.org/2019.icon-1.1",
pages = "1--8",
abstract = "Word based deep learning approaches have been used with increasing success recently to solve Natural Language Processing problems like Machine Translation, Language Modelling and Text Classification. However, performance of these word based models is limited by the vocabulary of the training corpus. Alternate approaches using character based models have been proposed to overcome the unseen word problems arising for a variety of reasons. However, character based models fail to capture the sequential relationship of words inherently present in texts. Hence, there is scope for improvement by addressing the unseen word problem while also maintaining the sequential context through word based models. In this work, we propose a method where the input embedding vector incorporates sub-word information but is also suitable for use with models which successfully capture the sequential nature of text. We further attempt to establish that using such a word representation as input makes the model robust to unseen words, particularly arising due to tokenization and spelling errors, which is a common problem in systems where a typing interface is one of the input modalities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahanti-etal-2019-robust">
<titleInfo>
<title>Robust Text Classification using Sub-Word Information in Input Word Representations.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhanu</namePart>
<namePart type="given">Prakash</namePart>
<namePart type="family">Mahanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Priyank</namePart>
<namePart type="family">Chhipa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Sridhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinuthkumar</namePart>
<namePart type="family">Prasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th International Conference on Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India</publisher>
<place>
<placeTerm type="text">International Institute of Information Technology, Hyderabad, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word based deep learning approaches have been used with increasing success recently to solve Natural Language Processing problems like Machine Translation, Language Modelling and Text Classification. However, performance of these word based models is limited by the vocabulary of the training corpus. Alternate approaches using character based models have been proposed to overcome the unseen word problems arising for a variety of reasons. However, character based models fail to capture the sequential relationship of words inherently present in texts. Hence, there is scope for improvement by addressing the unseen word problem while also maintaining the sequential context through word based models. In this work, we propose a method where the input embedding vector incorporates sub-word information but is also suitable for use with models which successfully capture the sequential nature of text. We further attempt to establish that using such a word representation as input makes the model robust to unseen words, particularly arising due to tokenization and spelling errors, which is a common problem in systems where a typing interface is one of the input modalities.</abstract>
<identifier type="citekey">mahanti-etal-2019-robust</identifier>
<location>
<url>https://aclanthology.org/2019.icon-1.1</url>
</location>
<part>
<date>2019-12</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robust Text Classification using Sub-Word Information in Input Word Representations.
%A Mahanti, Bhanu Prakash
%A Chhipa, Priyank
%A Sridhar, Vivek
%A Prasan, Vinuthkumar
%Y Sharma, Dipti Misra
%Y Bhattacharya, Pushpak
%S Proceedings of the 16th International Conference on Natural Language Processing
%D 2019
%8 December
%I NLP Association of India
%C International Institute of Information Technology, Hyderabad, India
%F mahanti-etal-2019-robust
%X Word based deep learning approaches have been used with increasing success recently to solve Natural Language Processing problems like Machine Translation, Language Modelling and Text Classification. However, performance of these word based models is limited by the vocabulary of the training corpus. Alternate approaches using character based models have been proposed to overcome the unseen word problems arising for a variety of reasons. However, character based models fail to capture the sequential relationship of words inherently present in texts. Hence, there is scope for improvement by addressing the unseen word problem while also maintaining the sequential context through word based models. In this work, we propose a method where the input embedding vector incorporates sub-word information but is also suitable for use with models which successfully capture the sequential nature of text. We further attempt to establish that using such a word representation as input makes the model robust to unseen words, particularly arising due to tokenization and spelling errors, which is a common problem in systems where a typing interface is one of the input modalities.
%U https://aclanthology.org/2019.icon-1.1
%P 1-8
Markdown (Informal)
[Robust Text Classification using Sub-Word Information in Input Word Representations.](https://aclanthology.org/2019.icon-1.1) (Mahanti et al., ICON 2019)
ACL