@inproceedings{mathur-etal-2018-detecting,
title = "Detecting Offensive Tweets in {H}indi-{E}nglish Code-Switched Language",
author = "Mathur, Puneet and
Shah, Rajiv and
Sawhney, Ramit and
Mahata, Debanjan",
editor = "Ku, Lun-Wei and
Li, Cheng-Te",
booktitle = "Proceedings of the Sixth International Workshop on Natural Language Processing for Social Media",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3504/",
doi = "10.18653/v1/W18-3504",
pages = "18--26",
abstract = "The exponential rise of social media websites like Twitter, Facebook and Reddit in linguistically diverse geographical regions has led to hybridization of popular native languages with English in an effort to ease communication. The paper focuses on the classification of offensive tweets written in Hinglish language, which is a portmanteau of the Indic language Hindi with the Roman script. The paper introduces a novel tweet dataset, titled Hindi-English Offensive Tweet (HEOT) dataset, consisting of tweets in Hindi-English code switched language split into three classes: non-offensive, abusive and hate-speech. Further, we approach the problem of classification of the tweets in HEOT dataset using transfer learning wherein the proposed model employing Convolutional Neural Networks is pre-trained on tweets in English followed by retraining on Hinglish tweets."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mathur-etal-2018-detecting">
<titleInfo>
<title>Detecting Offensive Tweets in Hindi-English Code-Switched Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Puneet</namePart>
<namePart type="family">Mathur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajiv</namePart>
<namePart type="family">Shah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramit</namePart>
<namePart type="family">Sawhney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Mahata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Workshop on Natural Language Processing for Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cheng-Te</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The exponential rise of social media websites like Twitter, Facebook and Reddit in linguistically diverse geographical regions has led to hybridization of popular native languages with English in an effort to ease communication. The paper focuses on the classification of offensive tweets written in Hinglish language, which is a portmanteau of the Indic language Hindi with the Roman script. The paper introduces a novel tweet dataset, titled Hindi-English Offensive Tweet (HEOT) dataset, consisting of tweets in Hindi-English code switched language split into three classes: non-offensive, abusive and hate-speech. Further, we approach the problem of classification of the tweets in HEOT dataset using transfer learning wherein the proposed model employing Convolutional Neural Networks is pre-trained on tweets in English followed by retraining on Hinglish tweets.</abstract>
<identifier type="citekey">mathur-etal-2018-detecting</identifier>
<identifier type="doi">10.18653/v1/W18-3504</identifier>
<location>
<url>https://aclanthology.org/W18-3504/</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>18</start>
<end>26</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Offensive Tweets in Hindi-English Code-Switched Language
%A Mathur, Puneet
%A Shah, Rajiv
%A Sawhney, Ramit
%A Mahata, Debanjan
%Y Ku, Lun-Wei
%Y Li, Cheng-Te
%S Proceedings of the Sixth International Workshop on Natural Language Processing for Social Media
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F mathur-etal-2018-detecting
%X The exponential rise of social media websites like Twitter, Facebook and Reddit in linguistically diverse geographical regions has led to hybridization of popular native languages with English in an effort to ease communication. The paper focuses on the classification of offensive tweets written in Hinglish language, which is a portmanteau of the Indic language Hindi with the Roman script. The paper introduces a novel tweet dataset, titled Hindi-English Offensive Tweet (HEOT) dataset, consisting of tweets in Hindi-English code switched language split into three classes: non-offensive, abusive and hate-speech. Further, we approach the problem of classification of the tweets in HEOT dataset using transfer learning wherein the proposed model employing Convolutional Neural Networks is pre-trained on tweets in English followed by retraining on Hinglish tweets.
%R 10.18653/v1/W18-3504
%U https://aclanthology.org/W18-3504/
%U https://doi.org/10.18653/v1/W18-3504
%P 18-26
Markdown (Informal)
[Detecting Offensive Tweets in Hindi-English Code-Switched Language](https://aclanthology.org/W18-3504/) (Mathur et al., SocialNLP 2018)
ACL