@inproceedings{almodovar-etal-2022-language,
title = "Can Language Models Help in System Security? Investigating Log Anomaly Detection using {BERT}",
author = "Almodovar, Crispin and
Sabrina, Fariza and
Karimi, Sarvnaz and
Azad, Salahuddin",
editor = "Parameswaran, Pradeesh and
Biggs, Jennifer and
Powers, David",
booktitle = "Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association",
month = dec,
year = "2022",
address = "Adelaide, Australia",
publisher = "Australasian Language Technology Association",
url = "https://aclanthology.org/2022.alta-1.19",
pages = "139--147",
abstract = "The log files generated by networked computer systems contain valuable information that can be used to monitor system security and stability. Recently, techniques based on Deep Learning and Natural Language Processing have been proven effective in detecting anomalous activities from system logs. The current approaches, however, have limited practical application because they rely on log templates which cannot handle variability in log content, or they require supervised training to be effective. In this paper, a novel log anomaly detection approach named LogFiT is proposed. The LogFiT model inherits the linguistic {``}knowledge{''} encoded within a pretrained BERT-based language model and fine-tunes it towards learning the linguistic structure of system logs. The LogFiT model is trained in a self-supervised manner using normal log data only. Using masked token prediction and centroid distance minimisation as training objectives, the LogFiT model learns to recognise the linguistic patterns associated with the normal log data. During inference, a discriminator function uses the LogFiT model{'}s top-k token prediction accuracy and computed centroid distance to determine if the input is normal or anomaly. Experiments show that LogFiT{'}s F1 score and specificity exceeds that of baseline models on the HDFS dataset and comparable on the BGL dataset.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="almodovar-etal-2022-language">
<titleInfo>
<title>Can Language Models Help in System Security? Investigating Log Anomaly Detection using BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Crispin</namePart>
<namePart type="family">Almodovar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fariza</namePart>
<namePart type="family">Sabrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarvnaz</namePart>
<namePart type="family">Karimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salahuddin</namePart>
<namePart type="family">Azad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pradeesh</namePart>
<namePart type="family">Parameswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jennifer</namePart>
<namePart type="family">Biggs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Powers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Australasian Language Technology Association</publisher>
<place>
<placeTerm type="text">Adelaide, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The log files generated by networked computer systems contain valuable information that can be used to monitor system security and stability. Recently, techniques based on Deep Learning and Natural Language Processing have been proven effective in detecting anomalous activities from system logs. The current approaches, however, have limited practical application because they rely on log templates which cannot handle variability in log content, or they require supervised training to be effective. In this paper, a novel log anomaly detection approach named LogFiT is proposed. The LogFiT model inherits the linguistic “knowledge” encoded within a pretrained BERT-based language model and fine-tunes it towards learning the linguistic structure of system logs. The LogFiT model is trained in a self-supervised manner using normal log data only. Using masked token prediction and centroid distance minimisation as training objectives, the LogFiT model learns to recognise the linguistic patterns associated with the normal log data. During inference, a discriminator function uses the LogFiT model’s top-k token prediction accuracy and computed centroid distance to determine if the input is normal or anomaly. Experiments show that LogFiT’s F1 score and specificity exceeds that of baseline models on the HDFS dataset and comparable on the BGL dataset.</abstract>
<identifier type="citekey">almodovar-etal-2022-language</identifier>
<location>
<url>https://aclanthology.org/2022.alta-1.19</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>139</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can Language Models Help in System Security? Investigating Log Anomaly Detection using BERT
%A Almodovar, Crispin
%A Sabrina, Fariza
%A Karimi, Sarvnaz
%A Azad, Salahuddin
%Y Parameswaran, Pradeesh
%Y Biggs, Jennifer
%Y Powers, David
%S Proceedings of the 20th Annual Workshop of the Australasian Language Technology Association
%D 2022
%8 December
%I Australasian Language Technology Association
%C Adelaide, Australia
%F almodovar-etal-2022-language
%X The log files generated by networked computer systems contain valuable information that can be used to monitor system security and stability. Recently, techniques based on Deep Learning and Natural Language Processing have been proven effective in detecting anomalous activities from system logs. The current approaches, however, have limited practical application because they rely on log templates which cannot handle variability in log content, or they require supervised training to be effective. In this paper, a novel log anomaly detection approach named LogFiT is proposed. The LogFiT model inherits the linguistic “knowledge” encoded within a pretrained BERT-based language model and fine-tunes it towards learning the linguistic structure of system logs. The LogFiT model is trained in a self-supervised manner using normal log data only. Using masked token prediction and centroid distance minimisation as training objectives, the LogFiT model learns to recognise the linguistic patterns associated with the normal log data. During inference, a discriminator function uses the LogFiT model’s top-k token prediction accuracy and computed centroid distance to determine if the input is normal or anomaly. Experiments show that LogFiT’s F1 score and specificity exceeds that of baseline models on the HDFS dataset and comparable on the BGL dataset.
%U https://aclanthology.org/2022.alta-1.19
%P 139-147
Markdown (Informal)
[Can Language Models Help in System Security? Investigating Log Anomaly Detection using BERT](https://aclanthology.org/2022.alta-1.19) (Almodovar et al., ALTA 2022)
ACL