@inproceedings{bohra-etal-2018-dataset,
title = "A Dataset of {H}indi-{E}nglish Code-Mixed Social Media Text for Hate Speech Detection",
author = "Bohra, Aditya and
Vijay, Deepanshu and
Singh, Vinay and
Akhtar, Syed Sarfaraz and
Shrivastava, Manish",
editor = "Nissim, Malvina and
Patti, Viviana and
Plank, Barbara and
Wagner, Claudia",
booktitle = "Proceedings of the Second Workshop on Computational Modeling of People`s Opinions, Personality, and Emotions in Social Media",
month = jun,
year = "2018",
address = "New Orleans, Louisiana, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-1105/",
doi = "10.18653/v1/W18-1105",
pages = "36--41",
abstract = "Hate speech detection in social media texts is an important Natural language Processing task, which has several crucial applications like sentiment analysis, investigating cyberbullying and examining socio-political controversies. While relevant research has been done independently on code-mixed social media texts and hate speech detection, our work is the first attempt in detecting hate speech in Hindi-English code-mixed social media text. In this paper, we analyze the problem of hate speech detection in code-mixed texts and present a Hindi-English code-mixed dataset consisting of tweets posted online on Twitter. The tweets are annotated with the language at word level and the class they belong to (Hate Speech or Normal Speech). We also propose a supervised classification system for detecting hate speech in the text using various character level, word level, and lexicon based features."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bohra-etal-2018-dataset">
<titleInfo>
<title>A Dataset of Hindi-English Code-Mixed Social Media Text for Hate Speech Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Bohra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepanshu</namePart>
<namePart type="family">Vijay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vinay</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syed</namePart>
<namePart type="given">Sarfaraz</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Computational Modeling of People‘s Opinions, Personality, and Emotions in Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviana</namePart>
<namePart type="family">Patti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Wagner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hate speech detection in social media texts is an important Natural language Processing task, which has several crucial applications like sentiment analysis, investigating cyberbullying and examining socio-political controversies. While relevant research has been done independently on code-mixed social media texts and hate speech detection, our work is the first attempt in detecting hate speech in Hindi-English code-mixed social media text. In this paper, we analyze the problem of hate speech detection in code-mixed texts and present a Hindi-English code-mixed dataset consisting of tweets posted online on Twitter. The tweets are annotated with the language at word level and the class they belong to (Hate Speech or Normal Speech). We also propose a supervised classification system for detecting hate speech in the text using various character level, word level, and lexicon based features.</abstract>
<identifier type="citekey">bohra-etal-2018-dataset</identifier>
<identifier type="doi">10.18653/v1/W18-1105</identifier>
<location>
<url>https://aclanthology.org/W18-1105/</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>36</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset of Hindi-English Code-Mixed Social Media Text for Hate Speech Detection
%A Bohra, Aditya
%A Vijay, Deepanshu
%A Singh, Vinay
%A Akhtar, Syed Sarfaraz
%A Shrivastava, Manish
%Y Nissim, Malvina
%Y Patti, Viviana
%Y Plank, Barbara
%Y Wagner, Claudia
%S Proceedings of the Second Workshop on Computational Modeling of People‘s Opinions, Personality, and Emotions in Social Media
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana, USA
%F bohra-etal-2018-dataset
%X Hate speech detection in social media texts is an important Natural language Processing task, which has several crucial applications like sentiment analysis, investigating cyberbullying and examining socio-political controversies. While relevant research has been done independently on code-mixed social media texts and hate speech detection, our work is the first attempt in detecting hate speech in Hindi-English code-mixed social media text. In this paper, we analyze the problem of hate speech detection in code-mixed texts and present a Hindi-English code-mixed dataset consisting of tweets posted online on Twitter. The tweets are annotated with the language at word level and the class they belong to (Hate Speech or Normal Speech). We also propose a supervised classification system for detecting hate speech in the text using various character level, word level, and lexicon based features.
%R 10.18653/v1/W18-1105
%U https://aclanthology.org/W18-1105/
%U https://doi.org/10.18653/v1/W18-1105
%P 36-41
Markdown (Informal)
[A Dataset of Hindi-English Code-Mixed Social Media Text for Hate Speech Detection](https://aclanthology.org/W18-1105/) (Bohra et al., PEOPLES 2018)
ACL
- Aditya Bohra, Deepanshu Vijay, Vinay Singh, Syed Sarfaraz Akhtar, and Manish Shrivastava. 2018. A Dataset of Hindi-English Code-Mixed Social Media Text for Hate Speech Detection. In Proceedings of the Second Workshop on Computational Modeling of People’s Opinions, Personality, and Emotions in Social Media, pages 36–41, New Orleans, Louisiana, USA. Association for Computational Linguistics.