@inproceedings{srirangam-etal-2019-corpus,
title = "Corpus Creation and Analysis for Named Entity Recognition in {T}elugu-{E}nglish Code-Mixed Social Media Data",
author = "Srirangam, Vamshi Krishna and
Reddy, Appidi Abhinav and
Singh, Vinay and
Shrivastava, Manish",
editor = "Alva-Manchego, Fernando and
Choi, Eunsol and
Khashabi, Daniel",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-2025",
doi = "10.18653/v1/P19-2025",
pages = "183--189",
abstract = "Named Entity Recognition(NER) is one of the important tasks in Natural Language Processing(NLP) and also is a subtask of Information Extraction. In this paper we present our work on NER in Telugu-English code-mixed social media data. Code-Mixing, a progeny of multilingualism is a way in which multilingual people express themselves on social media by using linguistics units from different languages within a sentence or speech context. Entity Extraction from social media data such as tweets(twitter) is in general difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. We present a Telugu-English code-mixed corpus with the corresponding named entity tags. The named entities used to tag data are Person({`}Per{'}), Organization({`}Org{'}) and Location({`}Loc{'}). We experimented with the machine learning models Conditional Random Fields(CRFs), Decision Trees and BiLSTMs on our corpus which resulted in a F1-score of 0.96, 0.94 and 0.95 respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="srirangam-etal-2019-corpus">
<titleInfo>
<title>Corpus Creation and Analysis for Named Entity Recognition in Telugu-English Code-Mixed Social Media Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vamshi</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Srirangam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Appidi</namePart>
<namePart type="given">Abhinav</namePart>
<namePart type="family">Reddy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinay</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Khashabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Named Entity Recognition(NER) is one of the important tasks in Natural Language Processing(NLP) and also is a subtask of Information Extraction. In this paper we present our work on NER in Telugu-English code-mixed social media data. Code-Mixing, a progeny of multilingualism is a way in which multilingual people express themselves on social media by using linguistics units from different languages within a sentence or speech context. Entity Extraction from social media data such as tweets(twitter) is in general difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. We present a Telugu-English code-mixed corpus with the corresponding named entity tags. The named entities used to tag data are Person(‘Per’), Organization(‘Org’) and Location(‘Loc’). We experimented with the machine learning models Conditional Random Fields(CRFs), Decision Trees and BiLSTMs on our corpus which resulted in a F1-score of 0.96, 0.94 and 0.95 respectively.</abstract>
<identifier type="citekey">srirangam-etal-2019-corpus</identifier>
<identifier type="doi">10.18653/v1/P19-2025</identifier>
<location>
<url>https://aclanthology.org/P19-2025</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>183</start>
<end>189</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Creation and Analysis for Named Entity Recognition in Telugu-English Code-Mixed Social Media Data
%A Srirangam, Vamshi Krishna
%A Reddy, Appidi Abhinav
%A Singh, Vinay
%A Shrivastava, Manish
%Y Alva-Manchego, Fernando
%Y Choi, Eunsol
%Y Khashabi, Daniel
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F srirangam-etal-2019-corpus
%X Named Entity Recognition(NER) is one of the important tasks in Natural Language Processing(NLP) and also is a subtask of Information Extraction. In this paper we present our work on NER in Telugu-English code-mixed social media data. Code-Mixing, a progeny of multilingualism is a way in which multilingual people express themselves on social media by using linguistics units from different languages within a sentence or speech context. Entity Extraction from social media data such as tweets(twitter) is in general difficult due to its informal nature, code-mixed data further complicates the problem due to its informal, unstructured and incomplete information. We present a Telugu-English code-mixed corpus with the corresponding named entity tags. The named entities used to tag data are Person(‘Per’), Organization(‘Org’) and Location(‘Loc’). We experimented with the machine learning models Conditional Random Fields(CRFs), Decision Trees and BiLSTMs on our corpus which resulted in a F1-score of 0.96, 0.94 and 0.95 respectively.
%R 10.18653/v1/P19-2025
%U https://aclanthology.org/P19-2025
%U https://doi.org/10.18653/v1/P19-2025
%P 183-189
Markdown (Informal)
[Corpus Creation and Analysis for Named Entity Recognition in Telugu-English Code-Mixed Social Media Data](https://aclanthology.org/P19-2025) (Srirangam et al., ACL 2019)
ACL