@inproceedings{r-etal-2018-teamdl,
title = "{T}eam{DL} at {S}em{E}val-2018 Task 8: Cybersecurity Text Analysis using Convolutional Neural Network and Conditional Random Fields",
author = "R, Manikandan and
Madgula, Krishna and
Saha, Snehanshu",
editor = "Apidianaki, Marianna and
Mohammad, Saif M. and
May, Jonathan and
Shutova, Ekaterina and
Bethard, Steven and
Carpuat, Marine",
booktitle = "Proceedings of the 12th International Workshop on Semantic Evaluation",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/S18-1140",
doi = "10.18653/v1/S18-1140",
pages = "868--873",
abstract = "In this work we present our participation to SemEval-2018 Task 8 subtasks 1 {\&} 2 respectively. We developed Convolution Neural Network system for malware sentence classification (subtask 1) and Conditional Random Fields system for malware token label prediction (subtask 2). We experimented with couple of word embedding strategies, feature sets and achieved competitive performance across the two subtasks. For subtask 1 We experimented with two category of word embeddings namely native embeddings and task specific embedding using Word2vec and Glove algorithms. 1. Native Embeddings: All words including the unknown ones that are randomly initialized use embeddings from original Word2vec/Glove models. 2. Task specific : The embeddings are generated by training Word2vec/Glove algorithms on sentences from MalwareTextDB We found that glove outperforms rest of embeddings for subtask 1. For subtask 2, we used N-grams of size 6, previous, next tokens and labels, features giving disjunctions of words anywhere in the left or right, word shape features, word lemma of current, previous and next words, word-tag pair features, POS tags, prefix and suffixes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="r-etal-2018-teamdl">
<titleInfo>
<title>TeamDL at SemEval-2018 Task 8: Cybersecurity Text Analysis using Convolutional Neural Network and Conditional Random Fields</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manikandan</namePart>
<namePart type="family">R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Krishna</namePart>
<namePart type="family">Madgula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Snehanshu</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Workshop on Semantic Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marine</namePart>
<namePart type="family">Carpuat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work we present our participation to SemEval-2018 Task 8 subtasks 1 & 2 respectively. We developed Convolution Neural Network system for malware sentence classification (subtask 1) and Conditional Random Fields system for malware token label prediction (subtask 2). We experimented with couple of word embedding strategies, feature sets and achieved competitive performance across the two subtasks. For subtask 1 We experimented with two category of word embeddings namely native embeddings and task specific embedding using Word2vec and Glove algorithms. 1. Native Embeddings: All words including the unknown ones that are randomly initialized use embeddings from original Word2vec/Glove models. 2. Task specific : The embeddings are generated by training Word2vec/Glove algorithms on sentences from MalwareTextDB We found that glove outperforms rest of embeddings for subtask 1. For subtask 2, we used N-grams of size 6, previous, next tokens and labels, features giving disjunctions of words anywhere in the left or right, word shape features, word lemma of current, previous and next words, word-tag pair features, POS tags, prefix and suffixes.</abstract>
<identifier type="citekey">r-etal-2018-teamdl</identifier>
<identifier type="doi">10.18653/v1/S18-1140</identifier>
<location>
<url>https://aclanthology.org/S18-1140</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>868</start>
<end>873</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TeamDL at SemEval-2018 Task 8: Cybersecurity Text Analysis using Convolutional Neural Network and Conditional Random Fields
%A R, Manikandan
%A Madgula, Krishna
%A Saha, Snehanshu
%Y Apidianaki, Marianna
%Y Mohammad, Saif M.
%Y May, Jonathan
%Y Shutova, Ekaterina
%Y Bethard, Steven
%Y Carpuat, Marine
%S Proceedings of the 12th International Workshop on Semantic Evaluation
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F r-etal-2018-teamdl
%X In this work we present our participation to SemEval-2018 Task 8 subtasks 1 & 2 respectively. We developed Convolution Neural Network system for malware sentence classification (subtask 1) and Conditional Random Fields system for malware token label prediction (subtask 2). We experimented with couple of word embedding strategies, feature sets and achieved competitive performance across the two subtasks. For subtask 1 We experimented with two category of word embeddings namely native embeddings and task specific embedding using Word2vec and Glove algorithms. 1. Native Embeddings: All words including the unknown ones that are randomly initialized use embeddings from original Word2vec/Glove models. 2. Task specific : The embeddings are generated by training Word2vec/Glove algorithms on sentences from MalwareTextDB We found that glove outperforms rest of embeddings for subtask 1. For subtask 2, we used N-grams of size 6, previous, next tokens and labels, features giving disjunctions of words anywhere in the left or right, word shape features, word lemma of current, previous and next words, word-tag pair features, POS tags, prefix and suffixes.
%R 10.18653/v1/S18-1140
%U https://aclanthology.org/S18-1140
%U https://doi.org/10.18653/v1/S18-1140
%P 868-873
Markdown (Informal)
[TeamDL at SemEval-2018 Task 8: Cybersecurity Text Analysis using Convolutional Neural Network and Conditional Random Fields](https://aclanthology.org/S18-1140) (R et al., SemEval 2018)
ACL