@inproceedings{park-you-2023-pretrained,
title = "A Pretrained Language Model for Cyber Threat Intelligence",
author = "Park, Youngja and
You, Weiqiu",
editor = "Wang, Mingxuan and
Zitouni, Imed",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-industry.12",
doi = "10.18653/v1/2023.emnlp-industry.12",
pages = "113--122",
abstract = "We present a new BERT model for the cybersecurity domain, CTI-BERT, which can improve the accuracy of cyber threat intelligence (CTI) extraction, enabling organizations to better defend against potential cyber threats. We provide detailed information about the domain corpus collection, the training methodology and its effectiveness for a variety of NLP tasks for the cybersecurity domain. The experiments show that CTI-BERT significantly outperforms several general-domain and security-domain models for these cybersecurity applications indicating that the training data and methodology have a significant impact on the model performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-you-2023-pretrained">
<titleInfo>
<title>A Pretrained Language Model for Cyber Threat Intelligence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Youngja</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiqiu</namePart>
<namePart type="family">You</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mingxuan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a new BERT model for the cybersecurity domain, CTI-BERT, which can improve the accuracy of cyber threat intelligence (CTI) extraction, enabling organizations to better defend against potential cyber threats. We provide detailed information about the domain corpus collection, the training methodology and its effectiveness for a variety of NLP tasks for the cybersecurity domain. The experiments show that CTI-BERT significantly outperforms several general-domain and security-domain models for these cybersecurity applications indicating that the training data and methodology have a significant impact on the model performance.</abstract>
<identifier type="citekey">park-you-2023-pretrained</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-industry.12</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-industry.12</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>113</start>
<end>122</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Pretrained Language Model for Cyber Threat Intelligence
%A Park, Youngja
%A You, Weiqiu
%Y Wang, Mingxuan
%Y Zitouni, Imed
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F park-you-2023-pretrained
%X We present a new BERT model for the cybersecurity domain, CTI-BERT, which can improve the accuracy of cyber threat intelligence (CTI) extraction, enabling organizations to better defend against potential cyber threats. We provide detailed information about the domain corpus collection, the training methodology and its effectiveness for a variety of NLP tasks for the cybersecurity domain. The experiments show that CTI-BERT significantly outperforms several general-domain and security-domain models for these cybersecurity applications indicating that the training data and methodology have a significant impact on the model performance.
%R 10.18653/v1/2023.emnlp-industry.12
%U https://aclanthology.org/2023.emnlp-industry.12
%U https://doi.org/10.18653/v1/2023.emnlp-industry.12
%P 113-122
Markdown (Informal)
[A Pretrained Language Model for Cyber Threat Intelligence](https://aclanthology.org/2023.emnlp-industry.12) (Park & You, EMNLP 2023)
ACL