@inproceedings{hughes-etal-2020-detecting,
title = "Detecting Trending Terms in Cybersecurity Forum Discussions",
author = "Hughes, Jack and
Aycock, Seth and
Caines, Andrew and
Buttery, Paula and
Hutchings, Alice",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wnut-1.15",
doi = "10.18653/v1/2020.wnut-1.15",
pages = "107--115",
abstract = "We present a lightweight method for identifying currently trending terms in relation to a known prior of terms, using a weighted log-odds ratio with an informative prior. We apply this method to a dataset of posts from an English-language underground hacking forum, spanning over ten years of activity, with posts containing misspellings, orthographic variation, acronyms, and slang. Our statistical approach supports analysis of linguistic change and discussion topics over time, without a requirement to train a topic model for each time interval for analysis. We evaluate the approach by comparing the results to TF-IDF using the discounted cumulative gain metric with human annotations, finding our method outperforms TF-IDF on information retrieval.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hughes-etal-2020-detecting">
<titleInfo>
<title>Detecting Trending Terms in Cybersecurity Forum Discussions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jack</namePart>
<namePart type="family">Hughes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Aycock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Caines</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paula</namePart>
<namePart type="family">Buttery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Hutchings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a lightweight method for identifying currently trending terms in relation to a known prior of terms, using a weighted log-odds ratio with an informative prior. We apply this method to a dataset of posts from an English-language underground hacking forum, spanning over ten years of activity, with posts containing misspellings, orthographic variation, acronyms, and slang. Our statistical approach supports analysis of linguistic change and discussion topics over time, without a requirement to train a topic model for each time interval for analysis. We evaluate the approach by comparing the results to TF-IDF using the discounted cumulative gain metric with human annotations, finding our method outperforms TF-IDF on information retrieval.</abstract>
<identifier type="citekey">hughes-etal-2020-detecting</identifier>
<identifier type="doi">10.18653/v1/2020.wnut-1.15</identifier>
<location>
<url>https://aclanthology.org/2020.wnut-1.15</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>107</start>
<end>115</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Trending Terms in Cybersecurity Forum Discussions
%A Hughes, Jack
%A Aycock, Seth
%A Caines, Andrew
%A Buttery, Paula
%A Hutchings, Alice
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F hughes-etal-2020-detecting
%X We present a lightweight method for identifying currently trending terms in relation to a known prior of terms, using a weighted log-odds ratio with an informative prior. We apply this method to a dataset of posts from an English-language underground hacking forum, spanning over ten years of activity, with posts containing misspellings, orthographic variation, acronyms, and slang. Our statistical approach supports analysis of linguistic change and discussion topics over time, without a requirement to train a topic model for each time interval for analysis. We evaluate the approach by comparing the results to TF-IDF using the discounted cumulative gain metric with human annotations, finding our method outperforms TF-IDF on information retrieval.
%R 10.18653/v1/2020.wnut-1.15
%U https://aclanthology.org/2020.wnut-1.15
%U https://doi.org/10.18653/v1/2020.wnut-1.15
%P 107-115
Markdown (Informal)
[Detecting Trending Terms in Cybersecurity Forum Discussions](https://aclanthology.org/2020.wnut-1.15) (Hughes et al., WNUT 2020)
ACL