@inproceedings{abouzakhar-etal-2008-unsupervised,
title = "Unsupervised Learning-based Anomalous {A}rabic Text Detection",
author = "Abouzakhar, Nasser and
Allison, Ben and
Guthrie, Louise",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf",
abstract = "The growing dependence of modern society on the Web as a vital source of information and communication has become inevitable. However, the Web has become an ideal channel for various terrorist organisations to publish their misleading information and send unintelligible messages to communicate with their clients as well. The increase in the number of published anomalous misleading information on the Web has led to an increase in security threats. The existing Web security mechanisms and protocols are not appropriately designed to deal with such recently developed problems. Developing technology to detect anomalous textual information has become one of the major challenges within the NLP community. This paper introduces the problem of anomalous text detection by automatically extracting linguistic features from documents and evaluating those features for patterns of suspicious and/or inconsistent information in Arabic documents. In order to achieve that, we defined specific linguistic features that characterise various Arabic writing styles. Also, the paper introduces the main challenges in Arabic processing and describes the proposed unsupervised learning model for detecting anomalous Arabic textual information.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abouzakhar-etal-2008-unsupervised">
<titleInfo>
<title>Unsupervised Learning-based Anomalous Arabic Text Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nasser</namePart>
<namePart type="family">Abouzakhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Allison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louise</namePart>
<namePart type="family">Guthrie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The growing dependence of modern society on the Web as a vital source of information and communication has become inevitable. However, the Web has become an ideal channel for various terrorist organisations to publish their misleading information and send unintelligible messages to communicate with their clients as well. The increase in the number of published anomalous misleading information on the Web has led to an increase in security threats. The existing Web security mechanisms and protocols are not appropriately designed to deal with such recently developed problems. Developing technology to detect anomalous textual information has become one of the major challenges within the NLP community. This paper introduces the problem of anomalous text detection by automatically extracting linguistic features from documents and evaluating those features for patterns of suspicious and/or inconsistent information in Arabic documents. In order to achieve that, we defined specific linguistic features that characterise various Arabic writing styles. Also, the paper introduces the main challenges in Arabic processing and describes the proposed unsupervised learning model for detecting anomalous Arabic textual information.</abstract>
<identifier type="citekey">abouzakhar-etal-2008-unsupervised</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Learning-based Anomalous Arabic Text Detection
%A Abouzakhar, Nasser
%A Allison, Ben
%A Guthrie, Louise
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F abouzakhar-etal-2008-unsupervised
%X The growing dependence of modern society on the Web as a vital source of information and communication has become inevitable. However, the Web has become an ideal channel for various terrorist organisations to publish their misleading information and send unintelligible messages to communicate with their clients as well. The increase in the number of published anomalous misleading information on the Web has led to an increase in security threats. The existing Web security mechanisms and protocols are not appropriately designed to deal with such recently developed problems. Developing technology to detect anomalous textual information has become one of the major challenges within the NLP community. This paper introduces the problem of anomalous text detection by automatically extracting linguistic features from documents and evaluating those features for patterns of suspicious and/or inconsistent information in Arabic documents. In order to achieve that, we defined specific linguistic features that characterise various Arabic writing styles. Also, the paper introduces the main challenges in Arabic processing and describes the proposed unsupervised learning model for detecting anomalous Arabic textual information.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf
Markdown (Informal)
[Unsupervised Learning-based Anomalous Arabic Text Detection](http://www.lrec-conf.org/proceedings/lrec2008/pdf/83_paper.pdf) (Abouzakhar et al., LREC 2008)
ACL