@inproceedings{mandal-singh-2018-language,
title = "Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture",
author = "Mandal, Soumil and
Singh, Anil Kumar",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the 2018 {EMNLP} Workshop W-{NUT}: The 4th Workshop on Noisy User-generated Text",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6116",
doi = "10.18653/v1/W18-6116",
pages = "116--120",
abstract = "An accurate language identification tool is an absolute necessity for building complex NLP systems to be used on code-mixed data. Lot of work has been recently done on the same, but there{'}s still room for improvement. Inspired from the recent advancements in neural network architectures for computer vision tasks, we have implemented multichannel neural networks combining CNN and LSTM for word level language identification of code-mixed data. Combining this with a Bi-LSTM-CRF context capture module, accuracies of 93.28{\%} and 93.32{\%} is achieved on our two testing sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mandal-singh-2018-language">
<titleInfo>
<title>Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soumil</namePart>
<namePart type="family">Mandal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anil</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An accurate language identification tool is an absolute necessity for building complex NLP systems to be used on code-mixed data. Lot of work has been recently done on the same, but there’s still room for improvement. Inspired from the recent advancements in neural network architectures for computer vision tasks, we have implemented multichannel neural networks combining CNN and LSTM for word level language identification of code-mixed data. Combining this with a Bi-LSTM-CRF context capture module, accuracies of 93.28% and 93.32% is achieved on our two testing sets.</abstract>
<identifier type="citekey">mandal-singh-2018-language</identifier>
<identifier type="doi">10.18653/v1/W18-6116</identifier>
<location>
<url>https://aclanthology.org/W18-6116</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>116</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture
%A Mandal, Soumil
%A Singh, Anil Kumar
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text
%D 2018
%8 November
%I Association for Computational Linguistics
%C Brussels, Belgium
%F mandal-singh-2018-language
%X An accurate language identification tool is an absolute necessity for building complex NLP systems to be used on code-mixed data. Lot of work has been recently done on the same, but there’s still room for improvement. Inspired from the recent advancements in neural network architectures for computer vision tasks, we have implemented multichannel neural networks combining CNN and LSTM for word level language identification of code-mixed data. Combining this with a Bi-LSTM-CRF context capture module, accuracies of 93.28% and 93.32% is achieved on our two testing sets.
%R 10.18653/v1/W18-6116
%U https://aclanthology.org/W18-6116
%U https://doi.org/10.18653/v1/W18-6116
%P 116-120
Markdown (Informal)
[Language Identification in Code-Mixed Data using Multichannel Neural Networks and Context Capture](https://aclanthology.org/W18-6116) (Mandal & Singh, WNUT 2018)
ACL